Spaces:

m42-health
/

MEDIC-Benchmark

Running

tathagataraha commited on 16 days ago

Commit

96ca081

1 Parent(s): 5c80286

[MODIFY] About

Files changed (2) hide show

app.py CHANGED Viewed

@@ -11,6 +11,7 @@ from src.about import (
     EVALUATION_QUEUE_TEXT,
     INTRODUCTION_TEXT,
     LLM_BENCHMARKS_TEXT_1,
     CROSS_EVALUATION_METRICS,
     NOTE_GENERATION_METRICS,
     # EVALUATION_EXAMPLE_IMG,
@@ -939,8 +940,9 @@ with demo:
                 with gr.Accordion("Cross Examination", open=False):
                         system_prompt, user_prompt = render_generation_templates(task="ce", generation_type="cross_examination")
         with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=5):
-            gr.HTML(FIVE_PILLAR_DIAGRAM)
             gr.Markdown(LLM_BENCHMARKS_TEXT_1, elem_classes="markdown-text")
             # gr.HTML(EVALUATION_EXAMPLE_IMG, elem_classes="logo")
             # gr.Markdown(LLM_BENCHMARKS_TEXT_2, elem_classes="markdown-text")
             # gr.HTML(ENTITY_DISTRIBUTION_IMG, elem_classes="logo")

     EVALUATION_QUEUE_TEXT,
     INTRODUCTION_TEXT,
     LLM_BENCHMARKS_TEXT_1,
+    LLM_BENCHMARKS_TEXT_2,
     CROSS_EVALUATION_METRICS,
     NOTE_GENERATION_METRICS,
     # EVALUATION_EXAMPLE_IMG,
                 with gr.Accordion("Cross Examination", open=False):
                         system_prompt, user_prompt = render_generation_templates(task="ce", generation_type="cross_examination")
         with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=5):
             gr.Markdown(LLM_BENCHMARKS_TEXT_1, elem_classes="markdown-text")
+            gr.HTML(FIVE_PILLAR_DIAGRAM)
+            gr.Markdown(LLM_BENCHMARKS_TEXT_2, elem_classes="markdown-text")
             # gr.HTML(EVALUATION_EXAMPLE_IMG, elem_classes="logo")
             # gr.Markdown(LLM_BENCHMARKS_TEXT_2, elem_classes="markdown-text")
             # gr.HTML(ENTITY_DISTRIBUTION_IMG, elem_classes="logo")

src/about.py CHANGED Viewed

@@ -104,7 +104,7 @@ NUM_FEWSHOT = 0  # Change with your few shot
 TITLE = """""" #<h1 align="center" id="space-title"> NER Leaderboard</h1>"""
 # LOGO = """<img src="https://equalengineers.com/wp-content/uploads/2024/04/dummy-logo-5b.png" alt="Clinical X HF" width="500" height="333">"""
 LOGO = """<img src="https://huggingface.co/spaces/m42-health/MEDIC-Benchmark/resolve/main/assets/image.png" alt="Clinical X HF" width="40%" style="display: block; margin-left: auto; margin-right: auto;">"""
-FIVE_PILLAR_DIAGRAM = """<img src="https://huggingface.co/spaces/m42-health/MEDIC-Benchmark/resolve/main/assets/MEDIC_Diagram.jpg" alt="MEDIC Diagram" width="65%" style="display: block; margin-left: auto; margin-right: auto;">"""
 # What does your leaderboard evaluate?
 INTRODUCTION_TEXT = """
@@ -117,16 +117,10 @@ Disclaimer: It is important to note that the purpose of this evaluation is purel
 LLM_BENCHMARKS_TEXT_1 = f"""
 ## About
-The MEDIC Leaderboard provides a comprehensive evaluation of large language models (LLMs) on various healthcare tasks. It assesses the performance of different LLMs across five key dimensions:
-- Medical Reasoning
-- Ethics and Bias Concerns
-- Data and Language Understanding
-- In-Context Learning
-- Clinical Safety and Risk Assessment
-By evaluating these dimensions, MEDIC aims to measure how effective and safe LLMs would be when used in real healthcare settings.
 ## Evaluation Categories
 ### Close-ended Questions

 TITLE = """""" #<h1 align="center" id="space-title"> NER Leaderboard</h1>"""
 # LOGO = """<img src="https://equalengineers.com/wp-content/uploads/2024/04/dummy-logo-5b.png" alt="Clinical X HF" width="500" height="333">"""
 LOGO = """<img src="https://huggingface.co/spaces/m42-health/MEDIC-Benchmark/resolve/main/assets/image.png" alt="Clinical X HF" width="40%" style="display: block; margin-left: auto; margin-right: auto;">"""
+FIVE_PILLAR_DIAGRAM = """<img src="https://huggingface.co/spaces/m42-health/MEDIC-Benchmark/resolve/main/assets/MEDIC_Diagram.jpg" alt="MEDIC Diagram" width="52%" style="display: block; margin-left: auto; margin-right: auto;">"""
 # What does your leaderboard evaluate?
 INTRODUCTION_TEXT = """
 LLM_BENCHMARKS_TEXT_1 = f"""
 ## About
+The MEDIC Leaderboard evaluates large language models (LLMs) on various healthcare tasks across five key dimensions. Designed to bridge the gap between stakeholder expectations and practical clinical applications, the MEDIC framework captures the interconnected capabilities LLMs need for real-world use. Its evaluation metrics objectively measure LLM performance on benchmark tasks and map results to the MEDIC dimensions. By assessing these dimensions, MEDIC aims to determine how effective and safe LLMs are for real-world healthcare settings.
+"""
+LLM_BENCHMARKS_TEXT_2 = """
 ## Evaluation Categories
 ### Close-ended Questions