File size: 10,441 Bytes
1e16b4c c6ce8a0 1e16b4c c6ce8a0 1e16b4c c6ce8a0 1e16b4c c6ce8a0 1e16b4c c6ce8a0 1e16b4c c6ce8a0 1e16b4c c6ce8a0 1e16b4c c6ce8a0 1e16b4c c6ce8a0 1e16b4c c6ce8a0 1e16b4c c6ce8a0 1e16b4c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 |
from typing import List
import gradio as gr
import numpy as np
import pandas as pd
_ORIGINAL_DF = pd.read_csv("./data/benchmark.csv")
_METRICS = ["MCC", "F1", "ACC"]
_AGGREGATION_METHODS = ["mean", "max", "min", "median"]
_TASKS = {
"histone_marks": [
"H4",
"H3",
"H3K14ac",
"H3K4me1",
"H3K4me3",
"H3K4me2",
"H3K36me3",
"H4ac",
"H3K79me3",
"H3K9ac",
],
"regulatory_elements": [
"promoter_no_tata",
"enhancers",
"enhancers_types",
"promoter_all",
"promoter_tata",
],
"RNA_production": [
"splice_sites_donors",
"splice_sites_all",
"splice_sites_acceptors",
],
}
_BIBTEX = """@article{DallaTorre2023TheNT,
title={The Nucleotide Transformer: Building and Evaluating Robust Foundation Models for Human Genomics},
author={Hugo Dalla-Torre and Liam Gonzalez and Javier Mendoza Revilla and Nicolas Lopez Carranza and Adam Henryk Grzywaczewski and Francesco Oteri and Christian Dallago and Evan Trop and Hassan Sirelkhatim and Guillaume Richard and Marcin J. Skwark and Karim Beguir and Marie Lopez and Thomas Pierrot},
journal={bioRxiv},
year={2023},
url={https://api.semanticscholar.org/CorpusID:255943445}
}
""" # noqa
_LAST_UPDATED = "Sept 15, 2023"
banner_url = "./assets/logo.png"
_BANNER = f'<div style="display: flex; justify-content: space-around;"><img src="{banner_url}" alt="Banner" style="width: 40vw; min-width: 300px; max-width: 600px;"> </div>' # noqa
_INTRODUCTION_TEXT = """The π€ Nucleotide Transformer Leaderboard aims to track, rank and evaluate DNA foundational models on a set of curated downstream tasks introduced in the huggingface dataset [nucleotide_transformer_downstream_tasks](https://huggingface.co/datasets/InstaDeepAI/nucleotide_transformer_downstream_tasks), with a standardized evaluation protocol presented in the "βΉοΈ Methods" tab.\n\n
This leaderboard has been designed to provide, to the best of our ability, fair and robust comparisons between models. If you have any question or concern regarding our methodology or if you would like another model to appear in this leaderboard, please reach out to m.lopez@instadeep.com and t.pierrot@instadeep.com. While we may not be able to take into consideration all requests, the team will always do its best to ensure that benchmark stays as fair, relevant and up-to-date as possible.\n\n
""" # noqa
_METHODS_TEXT = """
This leaderboard uses the downstream tasks benchmark and evaluation methdology described in the Nucleotide Transformer paper. We fine-tune each model on each task using a ten-fold validation strategy. For each model and each task, we report the aggregation over the ten-folds for several metrics - the Matthew Correlation Coefficient (MCC), the macro f1-score (F1) and the accuracy (ACC). The Nucleotide Transformer, DNABert and Enformer models have been fine-tuned using the same parameter efficient fine-tuning technique (IA3) with the same set of hyper-parameters. Due to the different nature of their architecture, the HyenaDNA models have been fully-finetuned using the original code provided by the authors.
\n\n
Please keep in mind that the Enformer has been originally trained in a supervised fashion to solve gene expression tasks. For the sake of benchmarking, we re-used the provided model torso as a pre-trained model for our benchmark, which is not the intended and recommended use of the original paper. Though we think this comparison is interesting to highlight the differences between self-supervised and supervised learning for pre-training and observe that the Enformer is a very competitive baseline even for tasks that differ from gene expression.
\n\n
For the sake of clarity the tasks being shown by default in this leaderboard are the human related tasks while the original Nucleotide Transformer paper shows performance over both yeast and human related tasks. To obtain the same results as the one shown in the paper, please check all the tasks boxes above.
\n\n
""" # noqa
def retrieve_array_from_text(text):
return np.fromstring(text.replace("[", "").replace("]", ""), dtype=float, sep=",")
def format_number(x):
return float(f"{x:.3}")
def get_dataset(
histone_tasks: List[str],
regulatory_tasks: List[str],
rna_tasks: List[str],
target_metric: str = "MCC",
aggregation_method: str = "mean",
):
tasks = histone_tasks + regulatory_tasks + rna_tasks
aggr_fn = getattr(np, aggregation_method)
scores = _ORIGINAL_DF[target_metric].apply(retrieve_array_from_text).apply(aggr_fn)
scores = scores.apply(format_number)
df = _ORIGINAL_DF.drop(columns=_METRICS)
df["Score"] = scores
df = df.pivot(index="Model", columns="Dataset", values="Score")
df = df[tasks]
df["All Tasks"] = df.agg("mean", axis="columns").apply(format_number)
columns = list(df.columns.values)
columns.sort()
df = df[columns]
df.reset_index(inplace=True)
df = df.rename(columns={"index": "Model"})
df = df.sort_values(by=["All Tasks"], ascending=False)
leaderboard_table = gr.components.Dataframe(
value=df,
interactive=False,
visible=True,
)
return leaderboard_table
def get_bar_plot(
histone_tasks: List[str],
regulatory_tasks: List[str],
rna_tasks: List[str],
target_metric: str = "MCC",
aggregation_method: str = "mean",
):
tasks = histone_tasks + regulatory_tasks + rna_tasks
aggr_fn = getattr(np, aggregation_method)
scores = _ORIGINAL_DF[target_metric].apply(retrieve_array_from_text).apply(aggr_fn)
scores = scores.apply(format_number)
df = _ORIGINAL_DF.drop(columns=_METRICS)
df["Score"] = scores / len(tasks)
df = df.query(f"Dataset == {tasks}")
bar_plot = gr.BarPlot(
df,
x="Model",
y="Score",
color="Dataset",
width=500,
x_label_angle=-45,
x_title="Model",
y_title="Score",
color_legend_title="Downstream Task",
)
return bar_plot
with gr.Blocks() as demo:
with gr.Row():
gr.Image(banner_url, height=160, scale=1)
gr.Markdown(_INTRODUCTION_TEXT, elem_classes="markdown-text")
# gr.Textbox(_INTRODUCTION_TEXT, scale=5)
with gr.Row():
metric_choice = gr.Dropdown(
choices=_METRICS,
value="MCC",
label="Metric displayed.",
)
aggr_choice = gr.Dropdown(
choices=_AGGREGATION_METHODS,
value="mean",
label="Aggregation used over 10-folds.",
)
with gr.Row():
regulatory_tasks = gr.CheckboxGroup(
choices=_TASKS["regulatory_elements"],
value=_TASKS["regulatory_elements"],
label="Regulatory Elements Downstream Tasks.",
info="Human data.",
scale=3,
)
rna_tasks = gr.CheckboxGroup(
choices=_TASKS["RNA_production"],
value=_TASKS["RNA_production"],
label="RNA Production Downstream Tasks.",
info="Human data.",
scale=3,
)
histone_tasks = gr.CheckboxGroup(
choices=_TASKS["histone_marks"],
label="Histone Modification Downstream Tasks.",
info="Yeast data.",
scale=4,
)
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem("π
Leaderboard", elem_id="od-benchmark-tab-table", id=0):
dataframe = gr.components.Dataframe(
elem_id="leaderboard-table",
)
with gr.TabItem("π Graph", elem_id="od-benchmark-tab-table", id=2):
bar_plot = gr.BarPlot(
elem_id="leaderboard-bar-plot",
x="Models",
y="Score",
)
with gr.TabItem("βΉοΈ Methods", elem_id="od-benchmark-tab-table", id=1):
gr.Markdown(_METHODS_TEXT, elem_classes="markdown-text")
gr.Markdown(f"Last updated on **{_LAST_UPDATED}**", elem_classes="markdown-text")
with gr.Row():
with gr.Accordion("π Citation", open=False):
gr.Textbox(
value=_BIBTEX,
lines=7,
label="Copy the BibTeX snippet to cite this source",
elem_id="citation-button",
show_copy_button=True
)
histone_tasks.change(
get_dataset,
inputs=[histone_tasks, regulatory_tasks, rna_tasks, metric_choice, aggr_choice],
outputs=dataframe,
)
regulatory_tasks.change(
get_dataset,
inputs=[histone_tasks, regulatory_tasks, rna_tasks, metric_choice, aggr_choice],
outputs=dataframe,
)
rna_tasks.change(
get_dataset,
inputs=[histone_tasks, regulatory_tasks, rna_tasks, metric_choice, aggr_choice],
outputs=dataframe,
)
metric_choice.change(
get_dataset,
inputs=[histone_tasks, regulatory_tasks, rna_tasks, metric_choice, aggr_choice],
outputs=dataframe,
)
aggr_choice.change(
get_dataset,
inputs=[histone_tasks, regulatory_tasks, rna_tasks, metric_choice, aggr_choice],
outputs=dataframe,
)
demo.load(
fn=get_dataset,
inputs=[histone_tasks, regulatory_tasks, rna_tasks, metric_choice, aggr_choice],
outputs=dataframe,
)
histone_tasks.change(
get_bar_plot,
inputs=[histone_tasks, regulatory_tasks, rna_tasks, metric_choice, aggr_choice],
outputs=bar_plot,
)
regulatory_tasks.change(
get_bar_plot,
inputs=[histone_tasks, regulatory_tasks, rna_tasks, metric_choice, aggr_choice],
outputs=bar_plot,
)
rna_tasks.change(
get_bar_plot,
inputs=[histone_tasks, regulatory_tasks, rna_tasks, metric_choice, aggr_choice],
outputs=bar_plot,
)
metric_choice.change(
get_bar_plot,
inputs=[histone_tasks, regulatory_tasks, rna_tasks, metric_choice, aggr_choice],
outputs=bar_plot,
)
aggr_choice.change(
get_bar_plot,
inputs=[histone_tasks, regulatory_tasks, rna_tasks, metric_choice, aggr_choice],
outputs=bar_plot,
)
demo.load(
fn=get_bar_plot,
inputs=[histone_tasks, regulatory_tasks, rna_tasks, metric_choice, aggr_choice],
outputs=bar_plot,
)
demo.launch()
|