File size: 4,532 Bytes
19aa2b2
 
ca89999
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19aa2b2
 
ca89999
19aa2b2
ca89999
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import gradio as gr
import pandas as pd
import os
import shutil

# Description and Introduction texts
DESCRIPTION = """
Independent performance benchmark of LLMs with various Inference Engines. Definitions are below the table.
"""

INTRODUCTION = """
**Introduction**
In our ongoing quest to help developers find the right libraries and LLMs for their use cases.

We tested them across six different inference engines (vLLM, TGI, TensorRT-LLM, Tritonvllm, Deepspeed-mii, ctranslate) on A100 GPUs hosted on Azure, ensuring a neutral playing field separate from our Inferless platform.
The goal?
To help developers, researchers, and AI enthusiasts pinpoint the best LLMs for their needs, whether for development or production.
"""

HOW_WE_TESTED = """
**How we tested?**
Here's how we ensured consistent, reliable benchmarks:
* **Platform:** All tests ran on A100 GPUs from Azure, providing a level playing field.
* **Setup:** Docker containers for each library ensured a consistent environment.
* **Configuration:** Standard settings (temperature 0.5, top_p 1) kept the focus on performance, not external variables.
* **Prompts & Token Ranges:** We used six distinct prompts with input lengths from 20 to 2,000 tokens and tested generation lengths of 100, 200, and 500 tokens to evaluate each library's flexibility.
* **Models & Libraries Tested:** We evaluated Phi-3-medium-128k-instruct, Meta-Llama-3.1-8B-Instruct, Mistral-7B-Instruct-v0.3, Qwen2-7B-Instruct, and Gemma-2-9b-it using Text Generation Inference (TGI), vLLM, DeepSpeed Mii, CTranslate2, Triton with vLLM Backend, and TensorRT-LLM.
"""

# Replace 'path/to/your/csv/folder' with the actual path to your folder containing CSV files
csv_folder_path = 'result_csv/'

# Function to read all CSV files from a folder and rearrange columns
def read_and_process_csv_files(folder_path):
    all_data = []
    for filename in os.listdir(folder_path):
        if filename.endswith('.csv'):
            file_path = os.path.join(folder_path, filename)
            df = pd.read_csv(file_path)
            all_data.append(df)
    
    combined_df = pd.concat(all_data, ignore_index=True)
    
    # Rearrange columns
    columns_order = [
        "Model_Name", "Library", "TTFT", "Tokens-per-Second", "Token_Count", 
        "Input_Tokens", "Output_Tokens", "Input", "Output"
    ]
    
    # Ensure all required columns exist, if not, create them with NaN values
    for col in columns_order:
        if col not in combined_df.columns:
            combined_df[col] = pd.NA
    
    # Select and order the columns
    return combined_df[columns_order]

df = read_and_process_csv_files(csv_folder_path)

def get_leaderboard_df():
    return df

def add_new_entry(file):
    global df
    if file is None:
        return df, "No file uploaded."
    
    # Read the uploaded CSV file
    new_df = pd.read_csv(file.name)
    
    # Rearrange columns to match the existing DataFrame
    columns_order = [
        "Model_Name", "Library", "TTFT", "Tokens-per-Second", "Token_Count", 
        "Input_Tokens", "Output_Tokens", "Input", "Output"
    ]
    for col in columns_order:
        if col not in new_df.columns:
            new_df[col] = pd.NA
    new_df = new_df[columns_order]
    
    # Append the new data to the existing DataFrame
    df = pd.concat([df, new_df], ignore_index=True)
    
    # Save the uploaded file to the CSV folder
    filename = os.path.basename(file.name)
    destination = os.path.join(csv_folder_path, filename)
    shutil.copy(file.name, destination)
    
    return df, f"File '{filename}' uploaded and data added successfully!"

with gr.Blocks() as demo:
    gr.Markdown("# LLM Inference Leaderboard")
    
    # About section at the top
    with gr.Column():
        gr.Markdown("---")
        gr.Markdown(DESCRIPTION)
        gr.Markdown(INTRODUCTION)
        gr.Markdown("---")
    
    # Tabs for Leaderboard and Add New Entry
    with gr.Tabs():
        with gr.TabItem("Leaderboard"):
            leaderboard = gr.DataFrame(df)
        
        with gr.TabItem("Add New Entry"):
            file_upload = gr.File(label="Upload CSV File")
            submit_button = gr.Button("Add Entry")
            result = gr.Markdown()

    # How we tested section at the bottom
    with gr.Column():
        gr.Markdown("---")
        gr.Markdown(HOW_WE_TESTED)

    submit_button.click(
        add_new_entry,
        inputs=[file_upload],
        outputs=[leaderboard, result]
    )

    demo.load(get_leaderboard_df, outputs=[leaderboard])

demo.launch()