Spaces:
Running
Running
natolambert
commited on
Commit
•
54b0338
1
Parent(s):
87b1f9b
mix averaging bug
Browse files- app.py +1 -1
- src/constants.py +5 -5
app.py
CHANGED
@@ -51,7 +51,7 @@ def avg_over_rewardbench(dataframe_core, dataframe_prefs):
|
|
51 |
for subset, sub_subsets in subset_mapping.items():
|
52 |
subset_cols = [col for col in new_df.columns if col in sub_subsets]
|
53 |
sub_data = new_df[subset_cols].values # take the relevant column values
|
54 |
-
sub_counts = [example_counts[s] for s in
|
55 |
new_df[subset] = np.average(sub_data, axis=1, weights=sub_counts) # take the weighted average
|
56 |
# new_df[subset] = np.round(np.nanmean(new_df[subset_cols].values, axis=1), 2)
|
57 |
|
|
|
51 |
for subset, sub_subsets in subset_mapping.items():
|
52 |
subset_cols = [col for col in new_df.columns if col in sub_subsets]
|
53 |
sub_data = new_df[subset_cols].values # take the relevant column values
|
54 |
+
sub_counts = [example_counts[s] for s in subset_cols] # take the example counts
|
55 |
new_df[subset] = np.average(sub_data, axis=1, weights=sub_counts) # take the weighted average
|
56 |
# new_df[subset] = np.round(np.nanmean(new_df[subset_cols].values, axis=1), 2)
|
57 |
|
src/constants.py
CHANGED
@@ -51,10 +51,10 @@ example_counts = {
|
|
51 |
"hep-rust": 164
|
52 |
}
|
53 |
|
|
|
54 |
subset_mapping = {
|
55 |
-
"Chat": [
|
56 |
-
"Chat Hard": [
|
57 |
-
"Safety": [
|
58 |
-
"Reasoning": ["math-prm"
|
59 |
-
"hep-cpp", "hep-go", "hep-java", "hep-js", "hep-python", "hep-rust"]
|
60 |
}
|
|
|
51 |
"hep-rust": 164
|
52 |
}
|
53 |
|
54 |
+
# note, this order should match the dataframe.
|
55 |
subset_mapping = {
|
56 |
+
"Chat": ['alpacaeval-easy', 'alpacaeval-hard', 'alpacaeval-length', 'mt-bench-easy', 'mt-bench-med'],
|
57 |
+
"Chat Hard": ['llmbar-adver-GPTInst', 'llmbar-adver-GPTOut', 'llmbar-adver-manual', 'llmbar-adver-neighbor', 'llmbar-natural', 'mt-bench-hard'],
|
58 |
+
"Safety": ['donotanswer', 'refusals-dangerous', 'refusals-offensive', 'xstest-should-refuse', 'xstest-should-respond'],
|
59 |
+
"Reasoning": ["hep-cpp", "hep-go", "hep-java", "hep-js", "hep-python", "hep-rust", "math-prm"]
|
|
|
60 |
}
|