import asyncio import streamlit as st import pandas as pd from typing import Optional, List, Set, Tuple, Dict, Any from .components.filters import render_table_filters from .components.visualizations import ( render_leaderboard_table, render_performance_plots, render_device_rankings, ) from .components.header import render_header, render_contribution_guide from .services.firebase import fetch_leaderboard_data from .core.styles import CUSTOM_CSS from .core.scoring import ( calculate_performance_score, get_performance_metrics, StandardBenchmarkConditions, ) def get_filter_values( df: pd.DataFrame, ) -> tuple[ List[str], List[str], List[str], List[str], List[str], Tuple[int, int], Tuple[int, int], Tuple[int, int], List[str], int, ]: """Get unique values for filters""" models = sorted(df["Model ID"].unique().tolist()) platforms = sorted(df["Platform"].unique().tolist()) devices = sorted(df["Device"].unique().tolist()) cache_type_v = sorted(df["cache_type_v"].unique().tolist()) cache_type_k = sorted(df["cache_type_k"].unique().tolist()) n_threads = (df["n_threads"].min(), df["n_threads"].max()) max_n_gpu_layers = (0, max(df["n_gpu_layers"].unique().tolist())) pp_range = (df["PP Config"].min(), df["PP Config"].max()) tg_range = (df["TG Config"].min(), df["TG Config"].max()) versions = sorted(df["Version"].unique().tolist()) return ( models, platforms, devices, cache_type_v, cache_type_k, pp_range, tg_range, n_threads, versions, max_n_gpu_layers, ) def render_performance_metrics(metrics: Dict[str, Any]): """Render performance metrics in a nice grid""" st.markdown("### 🏆 Performance Overview") col1, col2, col3, col4, col5 = st.columns(5) with col1: st.metric("Top Device", metrics["top_device"]) with col2: st.metric("Top Score", f"{metrics['top_score']:.1f}") with col3: st.metric("Average Score", f"{metrics['avg_score']:.1f}") with col4: st.metric("Total Devices", metrics["total_devices"]) with col5: st.metric("Total Models", metrics["total_models"]) async def main(): """Main application entry point""" st.set_page_config( page_title="AI Phone Benchmark Leaderboard", page_icon="📱", layout="wide", ) # Apply custom styles st.markdown(CUSTOM_CSS, unsafe_allow_html=True) # Fetch initial data df = await fetch_leaderboard_data() if df.empty: st.error("No data available. Please check your connection and try again.") return # Calculate performance scores df = calculate_performance_score(df) metrics = get_performance_metrics(df) # Render header render_header() # Get unique values for filters ( models, platforms, devices, cache_type_v, cache_type_k, pp_range, tg_range, n_threads, versions, max_n_gpu_layers, ) = get_filter_values(df) # Create main layout with sidebar for contribution guide if "show_guide" not in st.session_state: st.session_state.show_guide = True main_col, guide_col = st.columns( [ 0.9 if not st.session_state.show_guide else 0.8, 0.1 if not st.session_state.show_guide else 0.2, ] ) with main_col: # Create tabs for different views tab1, tab2 = st.tabs(["Device Rankings", "Benchmark Results"]) with tab1: # Device rankings view st.title("🏆 Device Rankings") # Show standardization notice std = StandardBenchmarkConditions() st.info( f"📊 Rankings are based on benchmarks with standard conditions: " f"PP={std.PP_CONFIG} tokens, TG={std.TG_CONFIG} tokens. " f"Scores factor in model size and quantization." ) # Render performance metrics render_performance_metrics(metrics) # Render device rankings render_device_rankings(df) with tab2: # Original benchmark view table_filters = render_table_filters( models, platforms, devices, cache_type_v, cache_type_k, pp_range, tg_range, n_threads, versions, max_n_gpu_layers, ) # Render the main leaderboard table render_leaderboard_table(df, table_filters) # Render plot section st.markdown("---") # Render performance plots with table filters render_performance_plots(df, table_filters) with guide_col: render_contribution_guide() if __name__ == "__main__": asyncio.run(main())