import gradio as gr import pandas as pd import os import zipfile import base64 CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" CITATION_BUTTON_TEXT = r"""@misc{aienergyscore-leaderboard, author = {Sasha Luccioni and Boris Gamazaychikov and Emma Strubell and Sara Hooker and Yacine Jernite and Carole-Jean Wu and Margaret Mitchell}, title = {AI Energy Score Leaderboard - February 2025}, year = {2025}, publisher = {Hugging Face}, howpublished = "\url{https://huggingface.co/spaces/AIEnergyScore/Leaderboard}", }""" # List of tasks (CSV filenames) tasks = [ 'asr.csv', 'object_detection.csv', 'text_classification.csv', 'image_captioning.csv', 'question_answering.csv', 'text_generation.csv', 'image_classification.csv', 'sentence_similarity.csv', 'image_generation.csv', 'summarization.csv' ] ### HELPER FUNCTIONS ### def format_stars(score): try: score_int = int(score) except Exception: score_int = 0 return f'{"★" * score_int}' def make_link(mname): parts = str(mname).split('/') display_name = parts[1] if len(parts) > 1 else mname return f'{display_name}' def extract_link_text(html_link): start = html_link.find('>') + 1 end = html_link.rfind('') if start > 0 and end > start: return html_link[start:end] else: return html_link def generate_html_table_from_df(df): # Compute a static width for the Model column based on the longest model name. if not df.empty: max_length = max(len(extract_link_text(link)) for link in df['Model']) else: max_length = 10 static_width = max_length * 10 + 16 max_energy = df['gpu_energy_numeric'].max() if not df.empty else 1 color_map = {"1": "black", "2": "black", "3": "black", "4": "black", "5": "black"} html = '' html += '' html += '' html += '' html += '' html += '' html += '' html += '' for _, row in df.iterrows(): energy_numeric = row['gpu_energy_numeric'] energy_str = f"{energy_numeric:,.2f}" bar_width = (energy_numeric / max_energy) * 100 score_val = row['energy_score'] bar_color = color_map.get(str(score_val), "gray") html += '' html += f'' html += f'' html += (f'') html += f'' html += '' html += '

Model	Provider	GPU Energy (Wh)	Score
{row["Model"]}	{row["Provider"]}	{energy_str} ' f'	{row["Score"]}

' return f'

{html}

' def process_df(task, sort_order="Low to High", filter_fn=None): df = pd.read_csv(os.path.join("data", "energy", task)) if df.columns[0].startswith("Unnamed:"): df = df.iloc[:, 1:] df['energy_score'] = df['energy_score'].astype(int) df['gpu_energy_numeric'] = pd.to_numeric(df['total_gpu_energy'], errors='raise') * 1000 if filter_fn is not None: df = filter_fn(df) df['Provider'] = df['model'].apply(lambda x: str(x).split('/')[0]) df['Model'] = df['model'].apply(make_link) df['Score'] = df['energy_score'].apply(format_stars) ascending = True if sort_order == "Low to High" else False df = df.sort_values(by='gpu_energy_numeric', ascending=ascending) return df def compute_efficiency_ratio(df): if df.empty: return 1 min_val = df['gpu_energy_numeric'].min() max_val = df['gpu_energy_numeric'].max() ratio = max_val / min_val if min_val > 0 else 1 return ratio def generate_info_callout(ratio, scope_text): """ Returns a "did you know" callout with a lightbulb emoji. The callout uses a light green background, a small font, and is limited to a max-width of 250px. It is wrapped in a container that aligns it to the right. """ return ( f'

' f'

' f'💡 There\'s a {ratio:,.1f}x difference between the highest and lowest energy use in {scope_text}.' f'

' ) def get_global_callout(): all_df = pd.DataFrame() for task in tasks: df = pd.read_csv(os.path.join("data", "energy", task)) if df.columns[0].startswith("Unnamed:"): df = df.iloc[:, 1:] df['gpu_energy_numeric'] = pd.to_numeric(df['total_gpu_energy'], errors='raise') * 1000 all_df = pd.concat([all_df, df], ignore_index=True) ratio = compute_efficiency_ratio(all_df) return generate_info_callout(ratio, "this leaderboard") ### ZIP DOWNLOAD FUNCTIONS ### def zip_csv_files(): data_dir = os.path.join("data", "energy") zip_filename = "data.zip" with zipfile.ZipFile(zip_filename, "w", zipfile.ZIP_DEFLATED) as zipf: for filename in os.listdir(data_dir): if filename.endswith(".csv"): filepath = os.path.join(data_dir, filename) zipf.write(filepath, arcname=filename) return zip_filename def get_zip_data_link(): zip_filename = zip_csv_files() with open(zip_filename, "rb") as f: data = f.read() b64 = base64.b64encode(data).decode() href = ( f'Download Data' ) return href ### UPDATE FUNCTIONS (RETURNING CALLOUT AND TABLE HTML) ### def update_text_generation(selected_display, sort_order): mapping = { "A (Single Consumer GPU) <20B parameters": "A", "B (Single Cloud GPU) 20-66B parameters": "B", "C (Multiple Cloud GPUs) >66B parameters": "C" } model_class = mapping.get(selected_display, "A") def filter_fn(df): if 'class' in df.columns: return df[df['class'] == model_class] return df df = process_df('text_generation.csv', sort_order, filter_fn) ratio = compute_efficiency_ratio(df) # For Text Generation, use "this class" as the scope. callout = generate_info_callout(ratio, "this class") table_html = generate_html_table_from_df(df) return callout, table_html def update_image_generation(sort_order): df = process_df('image_generation.csv', sort_order) ratio = compute_efficiency_ratio(df) callout = generate_info_callout(ratio, "this task") table_html = generate_html_table_from_df(df) return callout, table_html def update_text_classification(sort_order): df = process_df('text_classification.csv', sort_order) ratio = compute_efficiency_ratio(df) callout = generate_info_callout(ratio, "this task") table_html = generate_html_table_from_df(df) return callout, table_html def update_image_classification(sort_order): df = process_df('image_classification.csv', sort_order) ratio = compute_efficiency_ratio(df) callout = generate_info_callout(ratio, "this task") table_html = generate_html_table_from_df(df) return callout, table_html def update_image_captioning(sort_order): df = process_df('image_captioning.csv', sort_order) ratio = compute_efficiency_ratio(df) callout = generate_info_callout(ratio, "this task") table_html = generate_html_table_from_df(df) return callout, table_html def update_summarization(sort_order): df = process_df('summarization.csv', sort_order) ratio = compute_efficiency_ratio(df) callout = generate_info_callout(ratio, "this task") table_html = generate_html_table_from_df(df) return callout, table_html def update_asr(sort_order): df = process_df('asr.csv', sort_order) ratio = compute_efficiency_ratio(df) callout = generate_info_callout(ratio, "this task") table_html = generate_html_table_from_df(df) return callout, table_html def update_object_detection(sort_order): df = process_df('object_detection.csv', sort_order) ratio = compute_efficiency_ratio(df) callout = generate_info_callout(ratio, "this task") table_html = generate_html_table_from_df(df) return callout, table_html def update_sentence_similarity(sort_order): df = process_df('sentence_similarity.csv', sort_order) ratio = compute_efficiency_ratio(df) callout = generate_info_callout(ratio, "this task") table_html = generate_html_table_from_df(df) return callout, table_html def update_extractive_qa(sort_order): df = process_df('question_answering.csv', sort_order) ratio = compute_efficiency_ratio(df) callout = generate_info_callout(ratio, "this task") table_html = generate_html_table_from_df(df) return callout, table_html def update_all_tasks(sort_order): all_df = pd.DataFrame() for task in tasks: df = pd.read_csv(os.path.join("data", "energy", task)) if df.columns[0].startswith("Unnamed:"): df = df.iloc[:, 1:] df['energy_score'] = df['energy_score'].astype(int) df['gpu_energy_numeric'] = pd.to_numeric(df['total_gpu_energy'], errors='raise') * 1000 df['Provider'] = df['model'].apply(lambda x: str(x).split('/')[0]) df['Model'] = df['model'].apply(make_link) df['Score'] = df['energy_score'].apply(format_stars) all_df = pd.concat([all_df, df], ignore_index=True) all_df = all_df.drop_duplicates(subset=['model']) ascending = True if sort_order == "Low to High" else False all_df = all_df.sort_values(by='gpu_energy_numeric', ascending=ascending) ratio = compute_efficiency_ratio(all_df) callout = generate_info_callout(ratio, "this leaderboard") table_html = generate_html_table_from_df(all_df) return callout, table_html ### GLOBAL HEADER (Logo & Global Callout) ### # Use a