meg-huggingface commited on
Commit
130a6d2
1 Parent(s): 1075b83
Files changed (3) hide show
  1. app.py +20 -15
  2. src/about.py +6 -4
  3. src/display/utils.py +2 -1
app.py CHANGED
@@ -156,21 +156,26 @@ with demo:
156
  elem_id="search-bar",
157
  )
158
  with gr.Row():
159
- shown_columns = gr.CheckboxGroup(
160
- choices=[
161
- c.name
162
- for c in fields(AutoEvalColumn)
163
- if c.displayed_by_default and not c.hidden and not c.never_hidden and not c.advanced and not c.dummy
164
- ],
165
- value=[
166
- c.name
167
- for c in fields(AutoEvalColumn)
168
- if c.displayed_by_default and not c.hidden and not c.never_hidden and not c.advanced
169
- ],
170
- label="Select metrics to show",
171
- elem_id="column-select",
172
- interactive=True,
173
- )
 
 
 
 
 
174
  with gr.Row():
175
  with gr.Accordion("Advanced options [WIP]", open=False):
176
  shown_columns_advanced = gr.CheckboxGroup(
 
156
  elem_id="search-bar",
157
  )
158
  with gr.Row():
159
+ with gr.Column():
160
+ shown_columns = gr.CheckboxGroup(
161
+ choices=[
162
+ c.name
163
+ for c in fields(AutoEvalColumn)
164
+ if c.displayed_by_default and not c.hidden and not c.never_hidden and not c.advanced and not c.dummy
165
+ ],
166
+ value=[
167
+ c.name
168
+ for c in fields(AutoEvalColumn)
169
+ if c.displayed_by_default and not c.hidden and not c.never_hidden and not c.advanced
170
+ ],
171
+ label="Select metrics to show",
172
+ elem_id="column-select",
173
+ interactive=True,
174
+ )
175
+ with gr.Column():
176
+ for c in fields(AutoEvalColumn):
177
+ if c.displayed_by_default and not c.hidden and not c.never_hidden and not c.advanced and not c.dummy:
178
+ gr.Markdown("**" + c.name + "**. " + c.cite, elem_classes="markdown-text")
179
  with gr.Row():
180
  with gr.Accordion("Advanced options [WIP]", open=False):
181
  shown_columns_advanced = gr.CheckboxGroup(
src/about.py CHANGED
@@ -6,15 +6,17 @@ class Task:
6
  benchmark: str
7
  metric: str
8
  col_name: str
 
 
9
 
10
 
11
  # Select your tasks here
12
  # ---------------------------------------------------
13
  class Tasks(Enum):
14
  # task_key in the json file, metric_key in the json file, name to display in the leaderboard
15
- task0 = Task("toxigen", "acc", "Toxicity")
16
- task1 = Task("anli_r1", "acc", "ANLI")
17
- task2 = Task("logiqa", "acc_norm", "LogiQA")
18
 
19
  NUM_FEWSHOT = 0 # Change with your few shot MEG NOTE: Not sure what that means.
20
  # ---------------------------------------------------
@@ -24,7 +26,7 @@ TITLE = """<h1 align="center" id="space-title">Toxicity Leaderboard</h1>"""
24
 
25
  # What does your leaderboard evaluate?
26
  INTRODUCTION_TEXT = """
27
- Evaluate the toxicity of open LLMs.
28
  """
29
 
30
  # Which evaluations are you running? how can people reproduce what you have?
 
6
  benchmark: str
7
  metric: str
8
  col_name: str
9
+ # Relevant citation for the task
10
+ cite: str = ""
11
 
12
 
13
  # Select your tasks here
14
  # ---------------------------------------------------
15
  class Tasks(Enum):
16
  # task_key in the json file, metric_key in the json file, name to display in the leaderboard
17
+ task0 = Task("toxigen", "acc", "Toxicity (lower is better)", cite="_ToxiGen: A Large-Scale Machine-Generated Dataset for Adversarial and Implicit Hate Speech Detection._ Hartvigsen et al., ACL 2022.")
18
+ task1 = Task("anli_r1", "acc", "ANLI", cite="_Adversarial NLI: A New Benchmark for Natural Language Understanding._ Nie et al., ACL 2020.")
19
+ task2 = Task("logiqa", "acc_norm", "LogiQA", cite="_LogiQA: A Challenge Dataset for Machine Reading Comprehension with Logical Reasoning_. Liu et al., IJCAI 2020.")
20
 
21
  NUM_FEWSHOT = 0 # Change with your few shot MEG NOTE: Not sure what that means.
22
  # ---------------------------------------------------
 
26
 
27
  # What does your leaderboard evaluate?
28
  INTRODUCTION_TEXT = """
29
+ <h2 align="center">Evaluate the toxicity of open LLMs.</h2>
30
  """
31
 
32
  # Which evaluations are you running? how can people reproduce what you have?
src/display/utils.py CHANGED
@@ -17,6 +17,7 @@ class ColumnContent:
17
  name: str
18
  type: str
19
  displayed_by_default: bool
 
20
  hidden: bool = False
21
  never_hidden: bool = False
22
  dummy: bool = False
@@ -30,7 +31,7 @@ auto_eval_column_dict.append(["model", ColumnContent, ColumnContent(name="Model"
30
  #Scores
31
  auto_eval_column_dict.append(["average", ColumnContent, ColumnContent(name="Average ⬆️", type="number", never_hidden=False, displayed_by_default=False)])
32
  for task in Tasks:
33
- auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(name=task.value.col_name, type="number", displayed_by_default=True)])
34
  # Model information
35
  auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent(name="Type", type="str", never_hidden=False, displayed_by_default=False, advanced=True)])
36
  auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", never_hidden=False, displayed_by_default=False, advanced=True)])
 
17
  name: str
18
  type: str
19
  displayed_by_default: bool
20
+ cite: str = ""
21
  hidden: bool = False
22
  never_hidden: bool = False
23
  dummy: bool = False
 
31
  #Scores
32
  auto_eval_column_dict.append(["average", ColumnContent, ColumnContent(name="Average ⬆️", type="number", never_hidden=False, displayed_by_default=False)])
33
  for task in Tasks:
34
+ auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(name=task.value.col_name, cite=task.value.cite, type="number", displayed_by_default=True)])
35
  # Model information
36
  auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent(name="Type", type="str", never_hidden=False, displayed_by_default=False, advanced=True)])
37
  auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", never_hidden=False, displayed_by_default=False, advanced=True)])