loubnabnl's picture
loubnabnl HF staff
add codefuse to the leaderboard
fb3a362
raw
history blame
6.2 kB
# source: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/blob/main/src/utils_display.py
from dataclasses import dataclass
import plotly.graph_objects as go
from transformers import AutoConfig
# These classes are for user facing column names, to avoid having to change them
# all around the code when a modif is needed
@dataclass
class ColumnContent:
name: str
type: str
displayed_by_default: bool
hidden: bool = False
def fields(raw_class):
return [
v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"
]
@dataclass(frozen=True)
class AutoEvalColumn: # Auto evals column
model_type_symbol = ColumnContent("T", "str", True)
model = ColumnContent("Models", "markdown", True)
win_rate = ColumnContent("Win Rate", "number", True)
average = ColumnContent("Average score", "number", False)
humaneval_python = ColumnContent("humaneval-python", "number", True)
java = ColumnContent("java", "number", True)
javascript = ColumnContent("javascript", "number", True)
throughput = ColumnContent("Throughput (tokens/s)", "number", True)
cpp = ColumnContent("cpp", "number", False)
php = ColumnContent("php", "number", False)
rust = ColumnContent("rust", "number", False)
swift = ColumnContent("swift", "number", False)
r = ColumnContent("r", "number", False)
lua = ColumnContent("lua", "number", False)
d = ColumnContent("d", "number", False)
racket = ColumnContent("racket", "number", False)
julia = ColumnContent("julia", "number", False)
languages = ColumnContent("#Languages", "number", False)
throughput_bs50 = ColumnContent("Throughput (tokens/s) bs=50", "number", False)
peak_memory = ColumnContent("Peak Memory (MB)", "number", False)
seq_length = ColumnContent("Seq_length", "number", False)
link = ColumnContent("Links", "str", False)
dummy = ColumnContent("Models", "str", True)
pr = ColumnContent("Submission PR", "str", False)
def model_hyperlink(link, model_name):
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
def make_clickable_names(df):
df["Models"] = df.apply(
lambda row: model_hyperlink(row["Links"], row["Models"]), axis=1
)
return df
def plot_throughput(df, bs=1):
throughput_column = (
"Throughput (tokens/s)" if bs == 1 else "Throughput (tokens/s) bs=50"
)
df["symbol"] = 2 # Triangle
df["color"] = ""
df.loc[df["Models"].str.contains("StarCoder|SantaCoder"), "color"] = "orange"
df.loc[df["Models"].str.contains("CodeGen"), "color"] = "pink"
df.loc[df["Models"].str.contains("Replit"), "color"] = "purple"
df.loc[df["Models"].str.contains("WizardCoder"), "color"] = "peru"
df.loc[df["Models"].str.contains("CodeGeex"), "color"] = "cornflowerblue"
df.loc[df["Models"].str.contains("StableCode"), "color"] = "cadetblue"
df.loc[df["Models"].str.contains("OctoCoder"), "color"] = "lime"
df.loc[df["Models"].str.contains("OctoGeeX"), "color"] = "wheat"
df.loc[df["Models"].str.contains("Deci"), "color"] = "salmon"
df.loc[df["Models"].str.contains("CodeLlama"), "color"] = "palevioletred"
df.loc[df["Models"].str.contains("CodeGuru"), "color"] = "burlywood"
df.loc[df["Models"].str.contains("Phind"), "color"] = "crimson"
df.loc[df["Models"].str.contains("Falcon"), "color"] = "dimgray"
df.loc[df["Models"].str.contains("Refact"), "color"] = "yellow"
df.loc[df["Models"].str.contains("Phi"), "color"] = "gray"
df.loc[df["Models"].str.contains("CodeShell"), "color"] = "lightskyblue"
df.loc[df["Models"].str.contains("CodeShell"), "color"] = "lightskyblue"
df.loc[df["Models"].str.contains("DeepSeek"), "color"] = "lightgreen"
df.loc[df["Models"].str.contains("CodeFuse"), "color"] = "olive"
fig = go.Figure()
for i in df.index:
fig.add_trace(
go.Scatter(
x=[df.loc[i, throughput_column]],
y=[df.loc[i, "Average score"]],
mode="markers",
marker=dict(
size=[df.loc[i, "Size (B)"] + 10],
color=df.loc[i, "color"],
symbol=df.loc[i, "symbol"],
),
name=df.loc[i, "Models"],
hovertemplate="<b>%{text}</b><br><br>"
+ f"{throughput_column}: %{{x}}<br>"
+ "Average Score: %{y}<br>"
+ "Peak Memory (MB): "
+ str(df.loc[i, "Peak Memory (MB)"])
+ "<br>"
+ "Human Eval (Python): "
+ str(df.loc[i, "humaneval-python"]),
text=[df.loc[i, "Models"]],
showlegend=True,
)
)
fig.update_layout(
autosize=False,
width=650,
height=600,
title=f"Average Score Vs Throughput (A100-80GB, Float16, Batch Size <b>{bs}</b>)",
xaxis_title=f"{throughput_column}",
yaxis_title="Average Code Score",
)
return fig
def styled_error(error):
return f"<p style='color: red; font-size: 20px; text-align: center;'>{error}</p>"
def styled_warning(warn):
return f"<p style='color: orange; font-size: 20px; text-align: center;'>{warn}</p>"
def styled_message(message):
return f"<p style='color: green; font-size: 20px; text-align: center;'>{message}</p>"
def has_no_nan_values(df, columns):
return df[columns].notna().all(axis=1)
def has_nan_values(df, columns):
return df[columns].isna().any(axis=1)
def is_model_on_hub(model_name: str, revision: str) -> bool:
try:
AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=False)
return True, None
except ValueError:
return (
False,
"needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.",
)
except Exception as e:
print(f"Could not get the model config from the hub.: {e}")
return False, "was not found on hub!"