Spaces:
Running
Running
shounakpaul95
commited on
Commit
•
52aa0e7
1
Parent(s):
c00c6b5
Update eval_utils.py
Browse files- eval_utils.py +5 -2
eval_utils.py
CHANGED
@@ -65,16 +65,19 @@ def evaluate_cjpe(gold_data, pred_data):
|
|
65 |
R = []
|
66 |
B = []
|
67 |
rl_evaluator = rouge.Rouge(metrics=['rouge-l'], max_n=2, limit_length=False, apply_avg=True)
|
68 |
-
for x in
|
69 |
gold_explanations = []
|
70 |
pred_explanations = []
|
71 |
for k,v in gold_data['explanation'].items():
|
72 |
gold_explanations.append(v[f'expert_{x}'])
|
73 |
pred_explanations.append(pred_data['explanation'][k])
|
|
|
74 |
rougex = rl_evaluator.get_scores(pred_explanations, gold_explanations)['rouge-l']['f']
|
75 |
bleux = get_BLEU_score(gold_explanations, pred_explanations)
|
76 |
R.append(rougex)
|
77 |
B.append(bleux)
|
|
|
|
|
78 |
|
79 |
rouge_score = sum(R)/len(R)
|
80 |
bleu_score = sum(B)/len(B)
|
@@ -214,7 +217,7 @@ def evaluate_pcr(gold_data, pred_data):
|
|
214 |
f1_scores = []
|
215 |
for k in range(1, 21):
|
216 |
correct, gold_total, pred_total = 0, 0, 0
|
217 |
-
for id, gold_candidates in gold_data.items():
|
218 |
pred_candidates = pred_data.get(id, [])
|
219 |
gold_candidates = [c for c in gold_candidates if c != id]
|
220 |
pred_candidates = [c for c in pred_candidates if c != id]
|
|
|
65 |
R = []
|
66 |
B = []
|
67 |
rl_evaluator = rouge.Rouge(metrics=['rouge-l'], max_n=2, limit_length=False, apply_avg=True)
|
68 |
+
for x in range(1, 6):
|
69 |
gold_explanations = []
|
70 |
pred_explanations = []
|
71 |
for k,v in gold_data['explanation'].items():
|
72 |
gold_explanations.append(v[f'expert_{x}'])
|
73 |
pred_explanations.append(pred_data['explanation'][k])
|
74 |
+
print("Metrics for expert", x, "...", end=' ')
|
75 |
rougex = rl_evaluator.get_scores(pred_explanations, gold_explanations)['rouge-l']['f']
|
76 |
bleux = get_BLEU_score(gold_explanations, pred_explanations)
|
77 |
R.append(rougex)
|
78 |
B.append(bleux)
|
79 |
+
print("Done.")
|
80 |
+
|
81 |
|
82 |
rouge_score = sum(R)/len(R)
|
83 |
bleu_score = sum(B)/len(B)
|
|
|
217 |
f1_scores = []
|
218 |
for k in range(1, 21):
|
219 |
correct, gold_total, pred_total = 0, 0, 0
|
220 |
+
for id, gold_candidates in tqdm(gold_data.items(), desc="pcr"):
|
221 |
pred_candidates = pred_data.get(id, [])
|
222 |
gold_candidates = [c for c in gold_candidates if c != id]
|
223 |
pred_candidates = [c for c in pred_candidates if c != id]
|