rbgo's picture
Update app.py
ca89999 verified
raw
history blame
4.53 kB
import gradio as gr
import pandas as pd
import os
import shutil
# Description and Introduction texts
DESCRIPTION = """
Independent performance benchmark of LLMs with various Inference Engines. Definitions are below the table.
"""
INTRODUCTION = """
**Introduction**
In our ongoing quest to help developers find the right libraries and LLMs for their use cases.
We tested them across six different inference engines (vLLM, TGI, TensorRT-LLM, Tritonvllm, Deepspeed-mii, ctranslate) on A100 GPUs hosted on Azure, ensuring a neutral playing field separate from our Inferless platform.
The goal?
To help developers, researchers, and AI enthusiasts pinpoint the best LLMs for their needs, whether for development or production.
"""
HOW_WE_TESTED = """
**How we tested?**
Here's how we ensured consistent, reliable benchmarks:
* **Platform:** All tests ran on A100 GPUs from Azure, providing a level playing field.
* **Setup:** Docker containers for each library ensured a consistent environment.
* **Configuration:** Standard settings (temperature 0.5, top_p 1) kept the focus on performance, not external variables.
* **Prompts & Token Ranges:** We used six distinct prompts with input lengths from 20 to 2,000 tokens and tested generation lengths of 100, 200, and 500 tokens to evaluate each library's flexibility.
* **Models & Libraries Tested:** We evaluated Phi-3-medium-128k-instruct, Meta-Llama-3.1-8B-Instruct, Mistral-7B-Instruct-v0.3, Qwen2-7B-Instruct, and Gemma-2-9b-it using Text Generation Inference (TGI), vLLM, DeepSpeed Mii, CTranslate2, Triton with vLLM Backend, and TensorRT-LLM.
"""
# Replace 'path/to/your/csv/folder' with the actual path to your folder containing CSV files
csv_folder_path = 'result_csv/'
# Function to read all CSV files from a folder and rearrange columns
def read_and_process_csv_files(folder_path):
all_data = []
for filename in os.listdir(folder_path):
if filename.endswith('.csv'):
file_path = os.path.join(folder_path, filename)
df = pd.read_csv(file_path)
all_data.append(df)
combined_df = pd.concat(all_data, ignore_index=True)
# Rearrange columns
columns_order = [
"Model_Name", "Library", "TTFT", "Tokens-per-Second", "Token_Count",
"Input_Tokens", "Output_Tokens", "Input", "Output"
]
# Ensure all required columns exist, if not, create them with NaN values
for col in columns_order:
if col not in combined_df.columns:
combined_df[col] = pd.NA
# Select and order the columns
return combined_df[columns_order]
df = read_and_process_csv_files(csv_folder_path)
def get_leaderboard_df():
return df
def add_new_entry(file):
global df
if file is None:
return df, "No file uploaded."
# Read the uploaded CSV file
new_df = pd.read_csv(file.name)
# Rearrange columns to match the existing DataFrame
columns_order = [
"Model_Name", "Library", "TTFT", "Tokens-per-Second", "Token_Count",
"Input_Tokens", "Output_Tokens", "Input", "Output"
]
for col in columns_order:
if col not in new_df.columns:
new_df[col] = pd.NA
new_df = new_df[columns_order]
# Append the new data to the existing DataFrame
df = pd.concat([df, new_df], ignore_index=True)
# Save the uploaded file to the CSV folder
filename = os.path.basename(file.name)
destination = os.path.join(csv_folder_path, filename)
shutil.copy(file.name, destination)
return df, f"File '{filename}' uploaded and data added successfully!"
with gr.Blocks() as demo:
gr.Markdown("# LLM Inference Leaderboard")
# About section at the top
with gr.Column():
gr.Markdown("---")
gr.Markdown(DESCRIPTION)
gr.Markdown(INTRODUCTION)
gr.Markdown("---")
# Tabs for Leaderboard and Add New Entry
with gr.Tabs():
with gr.TabItem("Leaderboard"):
leaderboard = gr.DataFrame(df)
with gr.TabItem("Add New Entry"):
file_upload = gr.File(label="Upload CSV File")
submit_button = gr.Button("Add Entry")
result = gr.Markdown()
# How we tested section at the bottom
with gr.Column():
gr.Markdown("---")
gr.Markdown(HOW_WE_TESTED)
submit_button.click(
add_new_entry,
inputs=[file_upload],
outputs=[leaderboard, result]
)
demo.load(get_leaderboard_df, outputs=[leaderboard])
demo.launch()