Spaces:
Running
Running
tathagataraha
commited on
Commit
Β·
96ca081
1
Parent(s):
5c80286
[MODIFY] About
Browse files- app.py +3 -1
- src/about.py +4 -10
app.py
CHANGED
@@ -11,6 +11,7 @@ from src.about import (
|
|
11 |
EVALUATION_QUEUE_TEXT,
|
12 |
INTRODUCTION_TEXT,
|
13 |
LLM_BENCHMARKS_TEXT_1,
|
|
|
14 |
CROSS_EVALUATION_METRICS,
|
15 |
NOTE_GENERATION_METRICS,
|
16 |
# EVALUATION_EXAMPLE_IMG,
|
@@ -939,8 +940,9 @@ with demo:
|
|
939 |
with gr.Accordion("Cross Examination", open=False):
|
940 |
system_prompt, user_prompt = render_generation_templates(task="ce", generation_type="cross_examination")
|
941 |
with gr.TabItem("π About", elem_id="llm-benchmark-tab-table", id=5):
|
942 |
-
gr.HTML(FIVE_PILLAR_DIAGRAM)
|
943 |
gr.Markdown(LLM_BENCHMARKS_TEXT_1, elem_classes="markdown-text")
|
|
|
|
|
944 |
# gr.HTML(EVALUATION_EXAMPLE_IMG, elem_classes="logo")
|
945 |
# gr.Markdown(LLM_BENCHMARKS_TEXT_2, elem_classes="markdown-text")
|
946 |
# gr.HTML(ENTITY_DISTRIBUTION_IMG, elem_classes="logo")
|
|
|
11 |
EVALUATION_QUEUE_TEXT,
|
12 |
INTRODUCTION_TEXT,
|
13 |
LLM_BENCHMARKS_TEXT_1,
|
14 |
+
LLM_BENCHMARKS_TEXT_2,
|
15 |
CROSS_EVALUATION_METRICS,
|
16 |
NOTE_GENERATION_METRICS,
|
17 |
# EVALUATION_EXAMPLE_IMG,
|
|
|
940 |
with gr.Accordion("Cross Examination", open=False):
|
941 |
system_prompt, user_prompt = render_generation_templates(task="ce", generation_type="cross_examination")
|
942 |
with gr.TabItem("π About", elem_id="llm-benchmark-tab-table", id=5):
|
|
|
943 |
gr.Markdown(LLM_BENCHMARKS_TEXT_1, elem_classes="markdown-text")
|
944 |
+
gr.HTML(FIVE_PILLAR_DIAGRAM)
|
945 |
+
gr.Markdown(LLM_BENCHMARKS_TEXT_2, elem_classes="markdown-text")
|
946 |
# gr.HTML(EVALUATION_EXAMPLE_IMG, elem_classes="logo")
|
947 |
# gr.Markdown(LLM_BENCHMARKS_TEXT_2, elem_classes="markdown-text")
|
948 |
# gr.HTML(ENTITY_DISTRIBUTION_IMG, elem_classes="logo")
|
src/about.py
CHANGED
@@ -104,7 +104,7 @@ NUM_FEWSHOT = 0 # Change with your few shot
|
|
104 |
TITLE = """""" #<h1 align="center" id="space-title"> NER Leaderboard</h1>"""
|
105 |
# LOGO = """<img src="https://equalengineers.com/wp-content/uploads/2024/04/dummy-logo-5b.png" alt="Clinical X HF" width="500" height="333">"""
|
106 |
LOGO = """<img src="https://huggingface.co/spaces/m42-health/MEDIC-Benchmark/resolve/main/assets/image.png" alt="Clinical X HF" width="40%" style="display: block; margin-left: auto; margin-right: auto;">"""
|
107 |
-
FIVE_PILLAR_DIAGRAM = """<img src="https://huggingface.co/spaces/m42-health/MEDIC-Benchmark/resolve/main/assets/MEDIC_Diagram.jpg" alt="MEDIC Diagram" width="
|
108 |
|
109 |
# What does your leaderboard evaluate?
|
110 |
INTRODUCTION_TEXT = """
|
@@ -117,16 +117,10 @@ Disclaimer: It is important to note that the purpose of this evaluation is purel
|
|
117 |
LLM_BENCHMARKS_TEXT_1 = f"""
|
118 |
|
119 |
## About
|
120 |
-
The MEDIC Leaderboard
|
121 |
-
|
122 |
-
- Medical Reasoning
|
123 |
-
- Ethics and Bias Concerns
|
124 |
-
- Data and Language Understanding
|
125 |
-
- In-Context Learning
|
126 |
-
- Clinical Safety and Risk Assessment
|
127 |
-
|
128 |
-
By evaluating these dimensions, MEDIC aims to measure how effective and safe LLMs would be when used in real healthcare settings.
|
129 |
|
|
|
130 |
## Evaluation Categories
|
131 |
### Close-ended Questions
|
132 |
|
|
|
104 |
TITLE = """""" #<h1 align="center" id="space-title"> NER Leaderboard</h1>"""
|
105 |
# LOGO = """<img src="https://equalengineers.com/wp-content/uploads/2024/04/dummy-logo-5b.png" alt="Clinical X HF" width="500" height="333">"""
|
106 |
LOGO = """<img src="https://huggingface.co/spaces/m42-health/MEDIC-Benchmark/resolve/main/assets/image.png" alt="Clinical X HF" width="40%" style="display: block; margin-left: auto; margin-right: auto;">"""
|
107 |
+
FIVE_PILLAR_DIAGRAM = """<img src="https://huggingface.co/spaces/m42-health/MEDIC-Benchmark/resolve/main/assets/MEDIC_Diagram.jpg" alt="MEDIC Diagram" width="52%" style="display: block; margin-left: auto; margin-right: auto;">"""
|
108 |
|
109 |
# What does your leaderboard evaluate?
|
110 |
INTRODUCTION_TEXT = """
|
|
|
117 |
LLM_BENCHMARKS_TEXT_1 = f"""
|
118 |
|
119 |
## About
|
120 |
+
The MEDIC Leaderboard evaluates large language models (LLMs) on various healthcare tasks across five key dimensions. Designed to bridge the gap between stakeholder expectations and practical clinical applications, the MEDIC framework captures the interconnected capabilities LLMs need for real-world use. Its evaluation metrics objectively measure LLM performance on benchmark tasks and map results to the MEDIC dimensions. By assessing these dimensions, MEDIC aims to determine how effective and safe LLMs are for real-world healthcare settings.
|
121 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
|
123 |
+
LLM_BENCHMARKS_TEXT_2 = """
|
124 |
## Evaluation Categories
|
125 |
### Close-ended Questions
|
126 |
|