tathagataraha commited on
Commit
96ca081
Β·
1 Parent(s): 5c80286

[MODIFY] About

Browse files
Files changed (2) hide show
  1. app.py +3 -1
  2. src/about.py +4 -10
app.py CHANGED
@@ -11,6 +11,7 @@ from src.about import (
11
  EVALUATION_QUEUE_TEXT,
12
  INTRODUCTION_TEXT,
13
  LLM_BENCHMARKS_TEXT_1,
 
14
  CROSS_EVALUATION_METRICS,
15
  NOTE_GENERATION_METRICS,
16
  # EVALUATION_EXAMPLE_IMG,
@@ -939,8 +940,9 @@ with demo:
939
  with gr.Accordion("Cross Examination", open=False):
940
  system_prompt, user_prompt = render_generation_templates(task="ce", generation_type="cross_examination")
941
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=5):
942
- gr.HTML(FIVE_PILLAR_DIAGRAM)
943
  gr.Markdown(LLM_BENCHMARKS_TEXT_1, elem_classes="markdown-text")
 
 
944
  # gr.HTML(EVALUATION_EXAMPLE_IMG, elem_classes="logo")
945
  # gr.Markdown(LLM_BENCHMARKS_TEXT_2, elem_classes="markdown-text")
946
  # gr.HTML(ENTITY_DISTRIBUTION_IMG, elem_classes="logo")
 
11
  EVALUATION_QUEUE_TEXT,
12
  INTRODUCTION_TEXT,
13
  LLM_BENCHMARKS_TEXT_1,
14
+ LLM_BENCHMARKS_TEXT_2,
15
  CROSS_EVALUATION_METRICS,
16
  NOTE_GENERATION_METRICS,
17
  # EVALUATION_EXAMPLE_IMG,
 
940
  with gr.Accordion("Cross Examination", open=False):
941
  system_prompt, user_prompt = render_generation_templates(task="ce", generation_type="cross_examination")
942
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=5):
 
943
  gr.Markdown(LLM_BENCHMARKS_TEXT_1, elem_classes="markdown-text")
944
+ gr.HTML(FIVE_PILLAR_DIAGRAM)
945
+ gr.Markdown(LLM_BENCHMARKS_TEXT_2, elem_classes="markdown-text")
946
  # gr.HTML(EVALUATION_EXAMPLE_IMG, elem_classes="logo")
947
  # gr.Markdown(LLM_BENCHMARKS_TEXT_2, elem_classes="markdown-text")
948
  # gr.HTML(ENTITY_DISTRIBUTION_IMG, elem_classes="logo")
src/about.py CHANGED
@@ -104,7 +104,7 @@ NUM_FEWSHOT = 0 # Change with your few shot
104
  TITLE = """""" #<h1 align="center" id="space-title"> NER Leaderboard</h1>"""
105
  # LOGO = """<img src="https://equalengineers.com/wp-content/uploads/2024/04/dummy-logo-5b.png" alt="Clinical X HF" width="500" height="333">"""
106
  LOGO = """<img src="https://huggingface.co/spaces/m42-health/MEDIC-Benchmark/resolve/main/assets/image.png" alt="Clinical X HF" width="40%" style="display: block; margin-left: auto; margin-right: auto;">"""
107
- FIVE_PILLAR_DIAGRAM = """<img src="https://huggingface.co/spaces/m42-health/MEDIC-Benchmark/resolve/main/assets/MEDIC_Diagram.jpg" alt="MEDIC Diagram" width="65%" style="display: block; margin-left: auto; margin-right: auto;">"""
108
 
109
  # What does your leaderboard evaluate?
110
  INTRODUCTION_TEXT = """
@@ -117,16 +117,10 @@ Disclaimer: It is important to note that the purpose of this evaluation is purel
117
  LLM_BENCHMARKS_TEXT_1 = f"""
118
 
119
  ## About
120
- The MEDIC Leaderboard provides a comprehensive evaluation of large language models (LLMs) on various healthcare tasks. It assesses the performance of different LLMs across five key dimensions:
121
-
122
- - Medical Reasoning
123
- - Ethics and Bias Concerns
124
- - Data and Language Understanding
125
- - In-Context Learning
126
- - Clinical Safety and Risk Assessment
127
-
128
- By evaluating these dimensions, MEDIC aims to measure how effective and safe LLMs would be when used in real healthcare settings.
129
 
 
130
  ## Evaluation Categories
131
  ### Close-ended Questions
132
 
 
104
  TITLE = """""" #<h1 align="center" id="space-title"> NER Leaderboard</h1>"""
105
  # LOGO = """<img src="https://equalengineers.com/wp-content/uploads/2024/04/dummy-logo-5b.png" alt="Clinical X HF" width="500" height="333">"""
106
  LOGO = """<img src="https://huggingface.co/spaces/m42-health/MEDIC-Benchmark/resolve/main/assets/image.png" alt="Clinical X HF" width="40%" style="display: block; margin-left: auto; margin-right: auto;">"""
107
+ FIVE_PILLAR_DIAGRAM = """<img src="https://huggingface.co/spaces/m42-health/MEDIC-Benchmark/resolve/main/assets/MEDIC_Diagram.jpg" alt="MEDIC Diagram" width="52%" style="display: block; margin-left: auto; margin-right: auto;">"""
108
 
109
  # What does your leaderboard evaluate?
110
  INTRODUCTION_TEXT = """
 
117
  LLM_BENCHMARKS_TEXT_1 = f"""
118
 
119
  ## About
120
+ The MEDIC Leaderboard evaluates large language models (LLMs) on various healthcare tasks across five key dimensions. Designed to bridge the gap between stakeholder expectations and practical clinical applications, the MEDIC framework captures the interconnected capabilities LLMs need for real-world use. Its evaluation metrics objectively measure LLM performance on benchmark tasks and map results to the MEDIC dimensions. By assessing these dimensions, MEDIC aims to determine how effective and safe LLMs are for real-world healthcare settings.
121
+ """
 
 
 
 
 
 
 
122
 
123
+ LLM_BENCHMARKS_TEXT_2 = """
124
  ## Evaluation Categories
125
  ### Close-ended Questions
126