natolambert commited on
Commit
54b0338
1 Parent(s): 87b1f9b

mix averaging bug

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. src/constants.py +5 -5
app.py CHANGED
@@ -51,7 +51,7 @@ def avg_over_rewardbench(dataframe_core, dataframe_prefs):
51
  for subset, sub_subsets in subset_mapping.items():
52
  subset_cols = [col for col in new_df.columns if col in sub_subsets]
53
  sub_data = new_df[subset_cols].values # take the relevant column values
54
- sub_counts = [example_counts[s] for s in sub_subsets] # take the example counts
55
  new_df[subset] = np.average(sub_data, axis=1, weights=sub_counts) # take the weighted average
56
  # new_df[subset] = np.round(np.nanmean(new_df[subset_cols].values, axis=1), 2)
57
 
 
51
  for subset, sub_subsets in subset_mapping.items():
52
  subset_cols = [col for col in new_df.columns if col in sub_subsets]
53
  sub_data = new_df[subset_cols].values # take the relevant column values
54
+ sub_counts = [example_counts[s] for s in subset_cols] # take the example counts
55
  new_df[subset] = np.average(sub_data, axis=1, weights=sub_counts) # take the weighted average
56
  # new_df[subset] = np.round(np.nanmean(new_df[subset_cols].values, axis=1), 2)
57
 
src/constants.py CHANGED
@@ -51,10 +51,10 @@ example_counts = {
51
  "hep-rust": 164
52
  }
53
 
 
54
  subset_mapping = {
55
- "Chat": ["alpacaeval-easy", "alpacaeval-length", "alpacaeval-hard", "mt-bench-easy", "mt-bench-med"],
56
- "Chat Hard": ["mt-bench-hard", "llmbar-natural", "llmbar-adver-neighbor", "llmbar-adver-GPTInst", "llmbar-adver-GPTOut", "llmbar-adver-manual"],
57
- "Safety": ["refusals-dangerous", "refusals-offensive", "xstest-should-refuse", "xstest-should-respond", "donotanswer"],
58
- "Reasoning": ["math-prm",
59
- "hep-cpp", "hep-go", "hep-java", "hep-js", "hep-python", "hep-rust"]
60
  }
 
51
  "hep-rust": 164
52
  }
53
 
54
+ # note, this order should match the dataframe.
55
  subset_mapping = {
56
+ "Chat": ['alpacaeval-easy', 'alpacaeval-hard', 'alpacaeval-length', 'mt-bench-easy', 'mt-bench-med'],
57
+ "Chat Hard": ['llmbar-adver-GPTInst', 'llmbar-adver-GPTOut', 'llmbar-adver-manual', 'llmbar-adver-neighbor', 'llmbar-natural', 'mt-bench-hard'],
58
+ "Safety": ['donotanswer', 'refusals-dangerous', 'refusals-offensive', 'xstest-should-refuse', 'xstest-should-respond'],
59
+ "Reasoning": ["hep-cpp", "hep-go", "hep-java", "hep-js", "hep-python", "hep-rust", "math-prm"]
 
60
  }