File size: 4,398 Bytes
3ebc784
5b15f5e
3ebc784
5b15f5e
3ebc784
 
5b15f5e
 
 
3ebc784
 
5b15f5e
 
3ebc784
5b15f5e
3ebc784
 
 
 
5b15f5e
 
3ebc784
5b15f5e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3ebc784
5b15f5e
 
 
 
 
 
 
 
 
3ebc784
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13ff3a0
a2b0b51
b9d3833
2f02c91
 
4a498d2
13ff3a0
3ebc784
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# source: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/blob/main/src/utils_display.py
from dataclasses import dataclass
import plotly.graph_objects as go

# These classes are for user facing column names, to avoid having to change them
# all around the code when a modif is needed
@dataclass
class ColumnContent:
    name: str
    type: str
    displayed_by_default: bool
    hidden: bool = False


def fields(raw_class):
    return [
        v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"
    ]


@dataclass(frozen=True)
class AutoEvalColumn:  # Auto evals column
    model_type_symbol = ColumnContent("T", "str", True)
    model = ColumnContent("Models", "markdown", True)
    win_rate = ColumnContent("Win Rate", "number", True)
    throughput = ColumnContent("Throughput (tokens/s)", "number", True)
    languages = ColumnContent("#Languages", "number", True)
    humaneval_python = ColumnContent("humaneval-python", "number", True)
    java = ColumnContent("java", "number", True)
    javascript = ColumnContent("javascript", "number", True)
    cpp = ColumnContent("cpp", "number", True)
    php = ColumnContent("php", "number", True)
    rust = ColumnContent("rust", "number", True)
    swift = ColumnContent("swift", "number", True)
    r = ColumnContent("r", "number", True)
    lua = ColumnContent("lua", "number", False)
    d = ColumnContent("d", "number", False)
    racket = ColumnContent("racket", "number", False)
    julia = ColumnContent("julia", "number", False)
    throughput_bs50 = ColumnContent("Throughput (tokens/s) bs=50", "number", False)
    peak_memory = ColumnContent("Peak Memory (MB)", "number", False)
    seq_length = ColumnContent("Seq_length", "number", False)
    average = ColumnContent("Average score", "number", False)
    link = ColumnContent("Links", "str", False)
    dummy = ColumnContent("Models", "str", False)


def model_hyperlink(link, model_name):
    return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'


def make_clickable_names(df):
    df["Models"] = df.apply(
        lambda row: model_hyperlink(row["Links"], row["Models"]), axis=1
    )
    return df


def plot_throughput(df, bs=1):
    throughput_column = (
        "Throughput (tokens/s)" if bs == 1 else "Throughput (tokens/s) bs=50"
    )

    df["symbol"] = 2  # Triangle
    df["color"] = ""
    df.loc[df["Models"].str.contains("StarCoder|SantaCoder"), "color"] = "orange"
    df.loc[df["Models"].str.contains("CodeGen"), "color"] = "pink"
    df.loc[df["Models"].str.contains("Replit"), "color"] = "purple"
    df.loc[df["Models"].str.contains("WizardCoder"), "color"] = "peru"
    df.loc[df["Models"].str.contains("CodeGeex"), "color"] = "cornflowerblue"
    df.loc[df["Models"].str.contains("StableCode"), "color"] = "cadetblue"
    df.loc[df["Models"].str.contains("OctoCoder"), "color"] = "lime"
    df.loc[df["Models"].str.contains("OctoGeeX"), "color"] = "wheat"
    df.loc[df["Models"].str.contains("Deci"), "color"] = "salmon"
    df.loc[df["Models"].str.contains("CodeLlama"), "color"] = "palevioletred"

    fig = go.Figure()

    for i in df.index:
        fig.add_trace(
            go.Scatter(
                x=[df.loc[i, throughput_column]],
                y=[df.loc[i, "Average score"]],
                mode="markers",
                marker=dict(
                    size=[df.loc[i, "Size (B)"] + 10],
                    color=df.loc[i, "color"],
                    symbol=df.loc[i, "symbol"],
                ),
                name=df.loc[i, "Models"],
                hovertemplate="<b>%{text}</b><br><br>"
                + f"{throughput_column}: %{{x}}<br>"
                + "Average Score: %{y}<br>"
                + "Peak Memory (MB): "
                + str(df.loc[i, "Peak Memory (MB)"])
                + "<br>"
                + "Human Eval (Python): "
                + str(df.loc[i, "humaneval-python"]),
                text=[df.loc[i, "Models"]],
                showlegend=True,
            )
        )

    fig.update_layout(
        autosize=False,
        width=650,
        height=600,
        title=f"Average Score Vs Throughput (A100-80GB, Float16, Batch Size <b>{bs}</b>)",
        xaxis_title=f"{throughput_column}",
        yaxis_title="Average Code Score",
    )
    return fig