Upload benchmarks/benchmark_retrieval_qa.py
Browse files
benchmarks/benchmark_retrieval_qa.py
CHANGED
|
@@ -407,6 +407,8 @@ class RetrievalQABenchmark:
|
|
| 407 |
n = len(results)
|
| 408 |
correct = sum(1 for r in results if r["correct"])
|
| 409 |
abstained = sum(1 for r in results if r.get("abstained", False))
|
|
|
|
|
|
|
| 410 |
correct_abstentions = sum(
|
| 411 |
1 for i in unanswerable_qs if results[i].get("abstained", False)
|
| 412 |
)
|
|
|
|
| 407 |
n = len(results)
|
| 408 |
correct = sum(1 for r in results if r["correct"])
|
| 409 |
abstained = sum(1 for r in results if r.get("abstained", False))
|
| 410 |
+
# Count abstentions properly
|
| 411 |
+
unanswerable_qs = [i for i, r in enumerate(results) if self.questions[i].is_unanswerable]
|
| 412 |
correct_abstentions = sum(
|
| 413 |
1 for i in unanswerable_qs if results[i].get("abstained", False)
|
| 414 |
)
|