Spaces:
Running
Running
zebra logic bench
Browse files
ZeroEval-main/result_dirs/zebra-grid.summary.json
CHANGED
@@ -285,6 +285,17 @@
|
|
285 |
"Total Puzzles": 1000,
|
286 |
"Reason Lens": "1216.40"
|
287 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
288 |
{
|
289 |
"Model": "gpt-3.5-turbo-0125",
|
290 |
"Mode": "greedy",
|
|
|
285 |
"Total Puzzles": 1000,
|
286 |
"Reason Lens": "1216.40"
|
287 |
},
|
288 |
+
{
|
289 |
+
"Model": "Meta-Llama-3-8B-Instruct",
|
290 |
+
"Mode": "sampling",
|
291 |
+
"Puzzle Acc": "11.00",
|
292 |
+
"Cell Acc": "26.11",
|
293 |
+
"No answer": "22.30",
|
294 |
+
"Easy Puzzle Acc": "36.79",
|
295 |
+
"Hard Puzzle Acc": "0.97",
|
296 |
+
"Total Puzzles": 1000,
|
297 |
+
"Reason Lens": "1282.40"
|
298 |
+
},
|
299 |
{
|
300 |
"Model": "gpt-3.5-turbo-0125",
|
301 |
"Mode": "greedy",
|
zebra_banner.png
ADDED