narcolepticchicken commited on
Commit
b4d00e5
·
verified ·
1 Parent(s): 30c4069

Upload benchmarks/benchmark_retrieval_qa.py

Browse files
benchmarks/benchmark_retrieval_qa.py CHANGED
@@ -407,6 +407,8 @@ class RetrievalQABenchmark:
407
  n = len(results)
408
  correct = sum(1 for r in results if r["correct"])
409
  abstained = sum(1 for r in results if r.get("abstained", False))
 
 
410
  correct_abstentions = sum(
411
  1 for i in unanswerable_qs if results[i].get("abstained", False)
412
  )
 
407
  n = len(results)
408
  correct = sum(1 for r in results if r["correct"])
409
  abstained = sum(1 for r in results if r.get("abstained", False))
410
+ # Count abstentions properly
411
+ unanswerable_qs = [i for i, r in enumerate(results) if self.questions[i].is_unanswerable]
412
  correct_abstentions = sum(
413
  1 for i in unanswerable_qs if results[i].get("abstained", False)
414
  )