Spaces:
Running
Running
natolambert
commited on
Commit
•
b64c62d
1
Parent(s):
45dc572
add contamination note
Browse files- src/md.py +3 -1
- src/rm-training-data.csv +36 -0
- src/utils.py +14 -3
src/md.py
CHANGED
@@ -95,5 +95,7 @@ For more details, see the [dataset](https://huggingface.co/datasets/allenai/rewa
|
|
95 |
TOP_TEXT = """
|
96 |
# RewardBench: Evaluating Reward Models
|
97 |
### Evaluating the capabilities, safety, and pitfalls of reward models
|
98 |
-
[Code](https://github.com/allenai/reward-bench) | [Eval. Dataset](https://huggingface.co/datasets/allenai/reward-bench) | [Prior Test Sets](https://huggingface.co/datasets/allenai/pref-test-sets) | [Results](https://huggingface.co/datasets/allenai/reward-bench-results) | [Paper](https://arxiv.org/abs/2403.13787) | Total models: {} | * Unverified models
|
|
|
|
|
99 |
"""
|
|
|
95 |
TOP_TEXT = """
|
96 |
# RewardBench: Evaluating Reward Models
|
97 |
### Evaluating the capabilities, safety, and pitfalls of reward models
|
98 |
+
[Code](https://github.com/allenai/reward-bench) | [Eval. Dataset](https://huggingface.co/datasets/allenai/reward-bench) | [Prior Test Sets](https://huggingface.co/datasets/allenai/pref-test-sets) | [Results](https://huggingface.co/datasets/allenai/reward-bench-results) | [Paper](https://arxiv.org/abs/2403.13787) | Total models: {} | * Unverified models | ⚠️ Dataset Contamination
|
99 |
+
|
100 |
+
⚠️ Many of the top models were trained on unintentionally contaminated, AI-generated data ([Skywork/Skywork-Reward-Preference-80K-v0.1](https://huggingface.co/datasets/Skywork/Skywork-Reward-Preference-80K-v0.1)), for more information, see this [gist](https://gist.github.com/natolambert/1aed306000c13e0e8c5bc17c1a5dd300).
|
101 |
"""
|
src/rm-training-data.csv
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Reward Model,Preference Datasets Used
|
2 |
+
RLHFlow/ArmoRM-Llama3-8B-v0.1,"HelpSteer, UltraFeedback, BeaverTails, Argilla-Capybara, Argilla-Math-Preferences, CodeUltraFeedback, Argilla-OpenOrca"
|
3 |
+
RLHFlow/pair-preference-model-LLaMA3-8B,"Filtered HH-RLHF, SHP, HelpSteer, SafeRLHF-30k, UltraFeedback, UltraInteract, CodeUltraFeedback, Argilla-Math, OpenOrca, Capybara"
|
4 |
+
sfairXC/FsfairX-LLaMA3-RM-v0.1,"Filtered HH-RLHF, SHP, HelpSteer, SafeRLHF-30k, UltraFeedback, UltraInteract, CodeUltraFeedback, Argilla-Math, OpenOrca, Capybara"
|
5 |
+
openbmb/Eurus-RM-7b,"UltraInteract, UltraFeedback, UltrSafety"
|
6 |
+
Nexusflow/Starling-RM-34B,Nectar
|
7 |
+
weqweasdas/RM-Mistral-7B,"HH-RLHF, Capybara, Orca, SHP, UltraFeedback, HelpSetter, PKU-SafeRLHF, PKU-SafeRLHF-30k"
|
8 |
+
hendrydong/Mistral-RM-for-RAFT-GSHF-v0,Undisclosed
|
9 |
+
stabilityai/stablelm-2-12b-chat,"HH-RLHF, argilla/dpo-mix-7k, and other Undisclosed"
|
10 |
+
Ray2333/reward-model-Mistral-7B-instruct...,"Summarize, WebGPT, Dahoas/instruct-synthetic-prompt-responses, HH-RLHF, ChatBotArena Conversations, UltraFeedback, Nectar"
|
11 |
+
allenai/tulu-2-dpo-70b,UltraFeedback
|
12 |
+
meta-llama/Meta-Llama-3-70B-Instruct,Undisclosed
|
13 |
+
prometheus-eval/prometheus-8x7b-v2.0,Preference Collction (relabeled mix)
|
14 |
+
NousResearch/Nous-Hermes-2-Mistral-7B-DPO,Undisclosed
|
15 |
+
mistralai/Mixtral-8x7B-Instruct-v0.1,Undisclosed
|
16 |
+
upstage/SOLAR-10.7B-Instruct-v1.0,"OpenOrca, Intel-Orca, UltraFeedback"
|
17 |
+
HuggingFaceH4/zephyr-7b-alpha,UltraFeedback
|
18 |
+
allenai/tulu-2-dpo-13b,UltraFeedback
|
19 |
+
0-hero/Matter-0.1-7B-boost-DPO-preview,Undisclosed
|
20 |
+
prometheus-eval/prometheus-7b-v2.0,Preference Collction (relabeled mix)
|
21 |
+
HuggingFaceH4/starchat2-15b-v0.1,"UltraFeedback, Orca"
|
22 |
+
HuggingFaceH4/zephyr-7b-beta,UltraFeedback
|
23 |
+
allenai/tulu-2-dpo-7b,UltraFeedback
|
24 |
+
jondurbin/bagel-dpo-34b-v0.5,"Airoboros 3.2, Contextual DPO, HelpSteer, Orca, Gutenberg-DPO, Python DPO, Toxic DPO, Truthy, UltraFeedback"
|
25 |
+
berkeley-nest/Starling-RM-7B-alpha,Nectar
|
26 |
+
NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO,Undisclosed
|
27 |
+
0-hero/Matter-0.1-7B-DPO-preview,Undisclosed
|
28 |
+
stabilityai/stablelm-zephyr-3b,"UltraFeedback, Orca"
|
29 |
+
Qwen/Qwen1.5-14B-Chat,Undisclosed
|
30 |
+
CohereForAI/c4ai-command-r-plus,Undisclosed
|
31 |
+
OpenAssistant/oasst-rm-2.1-pythia-1.4b-epoch-2.5,"WebGPT, HH-RLHF, SHP, WebGPT, Summarize"
|
32 |
+
Qwen/Qwen1.5-7B-Chat,Undisclosed
|
33 |
+
weqweasdas/RM-Gemma-7B,"HH-RLHF, SHP, UltraFeedback, Capybara, HelpSteer, Orca"
|
34 |
+
openbmb/Eurus-7b-kto,"UltraInteract, UltraFeedback"
|
35 |
+
Qwen/Qwen1.5-72B-Chat,Undisclosed
|
36 |
+
openbmb/UltraRM-13b,"UltraFeedback, HH-RLHF, SHP, Summarize"
|
src/utils.py
CHANGED
@@ -18,6 +18,16 @@ UNVERIFIED_MODELS = [
|
|
18 |
"Salesforce/SFR-LLaMa-3.1-70B-Judge-r",
|
19 |
"Salesforce/SFR-nemo-12B-Judge-r",
|
20 |
"Salesforce/SFR-LLaMa-3.1-8B-Judge-r",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
]
|
22 |
|
23 |
# From Open LLM Leaderboard
|
@@ -40,9 +50,10 @@ def model_hyperlink(link, model_name):
|
|
40 |
output = f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
41 |
|
42 |
if model_name in UNVERIFIED_MODELS:
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
46 |
|
47 |
def undo_hyperlink(html_string):
|
48 |
# Regex pattern to match content inside > and <
|
|
|
18 |
"Salesforce/SFR-LLaMa-3.1-70B-Judge-r",
|
19 |
"Salesforce/SFR-nemo-12B-Judge-r",
|
20 |
"Salesforce/SFR-LLaMa-3.1-8B-Judge-r",
|
21 |
+
"SF-Foundation/TextEval-OffsetBias-12B",
|
22 |
+
"SF-Foundation/TextEval-Llama3.1-70B",
|
23 |
+
]
|
24 |
+
|
25 |
+
CONTAMINATED_MODELS = [
|
26 |
+
"Skywork/Skywork-Reward-Gemma-2-27B",
|
27 |
+
"Skywork/Skywork-Critic-Llama-3.1-70B",
|
28 |
+
"LxzGordon/URM-LLaMa-3.1-8B",
|
29 |
+
"Skywork/Skywork-Reward-Llama-3.1-8B",
|
30 |
+
"Ray2333/GRM-Llama3-8B-rewardmodel-ft",
|
31 |
]
|
32 |
|
33 |
# From Open LLM Leaderboard
|
|
|
50 |
output = f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
51 |
|
52 |
if model_name in UNVERIFIED_MODELS:
|
53 |
+
output += " *"
|
54 |
+
if model_name in CONTAMINATED_MODELS:
|
55 |
+
output += " ⚠️"
|
56 |
+
return output
|
57 |
|
58 |
def undo_hyperlink(html_string):
|
59 |
# Regex pattern to match content inside > and <
|