natolambert commited on
Commit
b64c62d
1 Parent(s): 45dc572

add contamination note

Browse files
Files changed (3) hide show
  1. src/md.py +3 -1
  2. src/rm-training-data.csv +36 -0
  3. src/utils.py +14 -3
src/md.py CHANGED
@@ -95,5 +95,7 @@ For more details, see the [dataset](https://huggingface.co/datasets/allenai/rewa
95
  TOP_TEXT = """
96
  # RewardBench: Evaluating Reward Models
97
  ### Evaluating the capabilities, safety, and pitfalls of reward models
98
- [Code](https://github.com/allenai/reward-bench) | [Eval. Dataset](https://huggingface.co/datasets/allenai/reward-bench) | [Prior Test Sets](https://huggingface.co/datasets/allenai/pref-test-sets) | [Results](https://huggingface.co/datasets/allenai/reward-bench-results) | [Paper](https://arxiv.org/abs/2403.13787) | Total models: {} | * Unverified models
 
 
99
  """
 
95
  TOP_TEXT = """
96
  # RewardBench: Evaluating Reward Models
97
  ### Evaluating the capabilities, safety, and pitfalls of reward models
98
+ [Code](https://github.com/allenai/reward-bench) | [Eval. Dataset](https://huggingface.co/datasets/allenai/reward-bench) | [Prior Test Sets](https://huggingface.co/datasets/allenai/pref-test-sets) | [Results](https://huggingface.co/datasets/allenai/reward-bench-results) | [Paper](https://arxiv.org/abs/2403.13787) | Total models: {} | * Unverified models | ⚠️ Dataset Contamination
99
+
100
+ ⚠️ Many of the top models were trained on unintentionally contaminated, AI-generated data ([Skywork/Skywork-Reward-Preference-80K-v0.1](https://huggingface.co/datasets/Skywork/Skywork-Reward-Preference-80K-v0.1)), for more information, see this [gist](https://gist.github.com/natolambert/1aed306000c13e0e8c5bc17c1a5dd300).
101
  """
src/rm-training-data.csv ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Reward Model,Preference Datasets Used
2
+ RLHFlow/ArmoRM-Llama3-8B-v0.1,"HelpSteer, UltraFeedback, BeaverTails, Argilla-Capybara, Argilla-Math-Preferences, CodeUltraFeedback, Argilla-OpenOrca"
3
+ RLHFlow/pair-preference-model-LLaMA3-8B,"Filtered HH-RLHF, SHP, HelpSteer, SafeRLHF-30k, UltraFeedback, UltraInteract, CodeUltraFeedback, Argilla-Math, OpenOrca, Capybara"
4
+ sfairXC/FsfairX-LLaMA3-RM-v0.1,"Filtered HH-RLHF, SHP, HelpSteer, SafeRLHF-30k, UltraFeedback, UltraInteract, CodeUltraFeedback, Argilla-Math, OpenOrca, Capybara"
5
+ openbmb/Eurus-RM-7b,"UltraInteract, UltraFeedback, UltrSafety"
6
+ Nexusflow/Starling-RM-34B,Nectar
7
+ weqweasdas/RM-Mistral-7B,"HH-RLHF, Capybara, Orca, SHP, UltraFeedback, HelpSetter, PKU-SafeRLHF, PKU-SafeRLHF-30k"
8
+ hendrydong/Mistral-RM-for-RAFT-GSHF-v0,Undisclosed
9
+ stabilityai/stablelm-2-12b-chat,"HH-RLHF, argilla/dpo-mix-7k, and other Undisclosed"
10
+ Ray2333/reward-model-Mistral-7B-instruct...,"Summarize, WebGPT, Dahoas/instruct-synthetic-prompt-responses, HH-RLHF, ChatBotArena Conversations, UltraFeedback, Nectar"
11
+ allenai/tulu-2-dpo-70b,UltraFeedback
12
+ meta-llama/Meta-Llama-3-70B-Instruct,Undisclosed
13
+ prometheus-eval/prometheus-8x7b-v2.0,Preference Collction (relabeled mix)
14
+ NousResearch/Nous-Hermes-2-Mistral-7B-DPO,Undisclosed
15
+ mistralai/Mixtral-8x7B-Instruct-v0.1,Undisclosed
16
+ upstage/SOLAR-10.7B-Instruct-v1.0,"OpenOrca, Intel-Orca, UltraFeedback"
17
+ HuggingFaceH4/zephyr-7b-alpha,UltraFeedback
18
+ allenai/tulu-2-dpo-13b,UltraFeedback
19
+ 0-hero/Matter-0.1-7B-boost-DPO-preview,Undisclosed
20
+ prometheus-eval/prometheus-7b-v2.0,Preference Collction (relabeled mix)
21
+ HuggingFaceH4/starchat2-15b-v0.1,"UltraFeedback, Orca"
22
+ HuggingFaceH4/zephyr-7b-beta,UltraFeedback
23
+ allenai/tulu-2-dpo-7b,UltraFeedback
24
+ jondurbin/bagel-dpo-34b-v0.5,"Airoboros 3.2, Contextual DPO, HelpSteer, Orca, Gutenberg-DPO, Python DPO, Toxic DPO, Truthy, UltraFeedback"
25
+ berkeley-nest/Starling-RM-7B-alpha,Nectar
26
+ NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO,Undisclosed
27
+ 0-hero/Matter-0.1-7B-DPO-preview,Undisclosed
28
+ stabilityai/stablelm-zephyr-3b,"UltraFeedback, Orca"
29
+ Qwen/Qwen1.5-14B-Chat,Undisclosed
30
+ CohereForAI/c4ai-command-r-plus,Undisclosed
31
+ OpenAssistant/oasst-rm-2.1-pythia-1.4b-epoch-2.5,"WebGPT, HH-RLHF, SHP, WebGPT, Summarize"
32
+ Qwen/Qwen1.5-7B-Chat,Undisclosed
33
+ weqweasdas/RM-Gemma-7B,"HH-RLHF, SHP, UltraFeedback, Capybara, HelpSteer, Orca"
34
+ openbmb/Eurus-7b-kto,"UltraInteract, UltraFeedback"
35
+ Qwen/Qwen1.5-72B-Chat,Undisclosed
36
+ openbmb/UltraRM-13b,"UltraFeedback, HH-RLHF, SHP, Summarize"
src/utils.py CHANGED
@@ -18,6 +18,16 @@ UNVERIFIED_MODELS = [
18
  "Salesforce/SFR-LLaMa-3.1-70B-Judge-r",
19
  "Salesforce/SFR-nemo-12B-Judge-r",
20
  "Salesforce/SFR-LLaMa-3.1-8B-Judge-r",
 
 
 
 
 
 
 
 
 
 
21
  ]
22
 
23
  # From Open LLM Leaderboard
@@ -40,9 +50,10 @@ def model_hyperlink(link, model_name):
40
  output = f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
41
 
42
  if model_name in UNVERIFIED_MODELS:
43
- return output + " *"
44
- else:
45
- return output
 
46
 
47
  def undo_hyperlink(html_string):
48
  # Regex pattern to match content inside > and <
 
18
  "Salesforce/SFR-LLaMa-3.1-70B-Judge-r",
19
  "Salesforce/SFR-nemo-12B-Judge-r",
20
  "Salesforce/SFR-LLaMa-3.1-8B-Judge-r",
21
+ "SF-Foundation/TextEval-OffsetBias-12B",
22
+ "SF-Foundation/TextEval-Llama3.1-70B",
23
+ ]
24
+
25
+ CONTAMINATED_MODELS = [
26
+ "Skywork/Skywork-Reward-Gemma-2-27B",
27
+ "Skywork/Skywork-Critic-Llama-3.1-70B",
28
+ "LxzGordon/URM-LLaMa-3.1-8B",
29
+ "Skywork/Skywork-Reward-Llama-3.1-8B",
30
+ "Ray2333/GRM-Llama3-8B-rewardmodel-ft",
31
  ]
32
 
33
  # From Open LLM Leaderboard
 
50
  output = f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
51
 
52
  if model_name in UNVERIFIED_MODELS:
53
+ output += " *"
54
+ if model_name in CONTAMINATED_MODELS:
55
+ output += " ⚠️"
56
+ return output
57
 
58
  def undo_hyperlink(html_string):
59
  # Regex pattern to match content inside > and <