import gradio as gr import pandas as pd import os import shutil # Description and Introduction texts DESCRIPTION = """ Independent performance benchmark of LLMs with various Inference Engines. Definitions are below the table. """ INTRODUCTION = """ **Introduction** In our ongoing quest to help developers find the right libraries and LLMs for their use cases. We tested them across six different inference engines (vLLM, TGI, TensorRT-LLM, Tritonvllm, Deepspeed-mii, ctranslate) on A100 GPUs hosted on Azure, ensuring a neutral playing field separate from our Inferless platform. The goal? To help developers, researchers, and AI enthusiasts pinpoint the best LLMs for their needs, whether for development or production. """ HOW_WE_TESTED = """ **How we tested?** Here's how we ensured consistent, reliable benchmarks: * **Platform:** All tests ran on A100 GPUs from Azure, providing a level playing field. * **Setup:** Docker containers for each library ensured a consistent environment. * **Configuration:** Standard settings (temperature 0.5, top_p 1) kept the focus on performance, not external variables. * **Prompts & Token Ranges:** We used six distinct prompts with input lengths from 20 to 2,000 tokens and tested generation lengths of 100, 200, and 500 tokens to evaluate each library's flexibility. * **Models & Libraries Tested:** We evaluated Phi-3-medium-128k-instruct, Meta-Llama-3.1-8B-Instruct, Mistral-7B-Instruct-v0.3, Qwen2-7B-Instruct, and Gemma-2-9b-it using Text Generation Inference (TGI), vLLM, DeepSpeed Mii, CTranslate2, Triton with vLLM Backend, and TensorRT-LLM. """ # Replace 'path/to/your/csv/folder' with the actual path to your folder containing CSV files csv_folder_path = 'result_csv/' # Function to read all CSV files from a folder and rearrange columns def read_and_process_csv_files(folder_path): all_data = [] for filename in os.listdir(folder_path): if filename.endswith('.csv'): file_path = os.path.join(folder_path, filename) df = pd.read_csv(file_path) all_data.append(df) combined_df = pd.concat(all_data, ignore_index=True) # Rearrange columns columns_order = [ "Model_Name", "Library", "TTFT", "Tokens-per-Second", "Token_Count", "Input_Tokens", "Output_Tokens", "Input", "Output" ] # Ensure all required columns exist, if not, create them with NaN values for col in columns_order: if col not in combined_df.columns: combined_df[col] = pd.NA # Select and order the columns return combined_df[columns_order] df = read_and_process_csv_files(csv_folder_path) def get_leaderboard_df(): return df def add_new_entry(file): global df if file is None: return df, "No file uploaded." # Read the uploaded CSV file new_df = pd.read_csv(file.name) # Rearrange columns to match the existing DataFrame columns_order = [ "Model_Name", "Library", "TTFT", "Tokens-per-Second", "Token_Count", "Input_Tokens", "Output_Tokens", "Input", "Output" ] for col in columns_order: if col not in new_df.columns: new_df[col] = pd.NA new_df = new_df[columns_order] # Append the new data to the existing DataFrame df = pd.concat([df, new_df], ignore_index=True) # Save the uploaded file to the CSV folder filename = os.path.basename(file.name) destination = os.path.join(csv_folder_path, filename) shutil.copy(file.name, destination) return df, f"File '{filename}' uploaded and data added successfully!" with gr.Blocks() as demo: gr.Markdown("# LLM Inference Leaderboard") # About section at the top with gr.Column(): gr.Markdown("---") gr.Markdown(DESCRIPTION) gr.Markdown(INTRODUCTION) gr.Markdown("---") # Tabs for Leaderboard and Add New Entry with gr.Tabs(): with gr.TabItem("Leaderboard"): leaderboard = gr.DataFrame(df) with gr.TabItem("Add New Entry"): file_upload = gr.File(label="Upload CSV File") submit_button = gr.Button("Add Entry") result = gr.Markdown() # How we tested section at the bottom with gr.Column(): gr.Markdown("---") gr.Markdown(HOW_WE_TESTED) submit_button.click( add_new_entry, inputs=[file_upload], outputs=[leaderboard, result] ) demo.load(get_leaderboard_df, outputs=[leaderboard]) demo.launch()