Spaces:
Running
Running
import gradio as gr | |
import pandas as pd | |
import requests | |
CHUNK_SIZE = 1000 | |
DATA_URL = "https://erkhov.com/huggingspace_data" | |
TIME_URL = "https://erkhov.com/huggingspace_time" | |
def fetch_time(): | |
response = requests.get(TIME_URL) | |
return response.text.strip() # assume the endpoint returns a raw timestamp or numeric value as string | |
def fetch_data(): | |
response = requests.get(DATA_URL) | |
data = response.json() | |
return data | |
def clickable(x, which_one): | |
if x in ["Not Found", "Unknown"]: | |
return "Not Found" | |
if which_one == "models": | |
return f'<a target="_blank" href="https://huggingface.co/{x}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{x}</a>' | |
else: | |
return f'<a target="_blank" href="https://huggingface.co/{which_one}/{x}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{x}</a>' | |
def create_dataframes(data): | |
models_data = data["models"] | |
authors_data = data["authors"] | |
# Create DataFrame for models | |
models_df = pd.DataFrame(models_data) | |
models_df.rename(columns={ | |
"id": "Model ID", | |
"author": "Author Name", | |
"downloads": "Downloads (30d)", | |
"likes": "Likes", | |
"created_at": "Created At", | |
"last_modified": "Last Modified" | |
}, inplace=True) | |
models_df["Model ID"] = models_df["Model ID"].apply(lambda x: clickable(x, "models")) | |
models_df["Author Name"] = models_df["Author Name"].apply(lambda x: clickable(x, "models")) | |
models_df = models_df.sort_values(by="Downloads (30d)", ascending=False) | |
# Create DataFrame for authors | |
authors_df = pd.DataFrame(authors_data) | |
authors_df.rename(columns={ | |
"author": "Author Name", | |
"models_count": "Models_Count", | |
"downloads": "Total_Downloads", | |
"likes": "Total_Likes" | |
}, inplace=True) | |
authors_df["Author Name"] = authors_df["Author Name"].apply(lambda x: clickable(x, "models")) | |
authors_df = authors_df.sort_values(by="Models_Count", ascending=False) | |
return models_df, authors_df | |
def apply_model_filters(models_df, search_query, min_downloads, min_likes): | |
df = models_df.copy() | |
# Extract visible text for filtering | |
visible_model_id = df["Model ID"].str.extract(r'>(.*?)<')[0] | |
visible_author_name = df["Author Name"].str.extract(r'>(.*?)<')[0] | |
# Search filter | |
if search_query.strip(): | |
mask = (visible_model_id.str.contains(search_query, case=False, na=False)) | \ | |
(visible_author_name.str.contains(search_query, case=False, na=False)) | |
df = df[mask] | |
# Minimum downloads filter | |
if min_downloads is not None and min_downloads > 0: | |
df = df[df["Downloads (30d)"] >= min_downloads] | |
# Minimum likes filter | |
if min_likes is not None and min_likes > 0: | |
df = df[df["Likes"] >= min_likes] | |
return df | |
def filter_models(models_df, search_query, min_downloads, min_likes): | |
filtered = apply_model_filters(models_df, search_query, min_downloads, min_likes) | |
return filtered.iloc[:CHUNK_SIZE], CHUNK_SIZE, filtered | |
def update_model_table(start_idx, filtered_df): | |
new_end = start_idx + CHUNK_SIZE | |
combined_df = filtered_df.iloc[:new_end].copy() | |
return combined_df, new_end | |
def apply_author_filters(authors_df, search_query, min_author_downloads, min_author_likes): | |
df = authors_df.copy() | |
# Extract visible text for author filtering: | |
visible_author_name = df["Author Name"].str.extract(r'>(.*?)<')[0] | |
# Search filter for authors | |
if search_query.strip(): | |
mask = visible_author_name.str.contains(search_query, case=False, na=False) | |
df = df[mask] | |
# Minimum total downloads filter | |
if min_author_downloads is not None and min_author_downloads > 0: | |
df = df[df["Total_Downloads"] >= min_author_downloads] | |
# Minimum total likes filter | |
if min_author_likes is not None and min_author_likes > 0: | |
df = df[df["Total_Likes"] >= min_author_likes] | |
return df | |
def filter_authors(authors_df, author_search_query, min_author_downloads, min_author_likes): | |
filtered_authors = apply_author_filters(authors_df, author_search_query, min_author_downloads, min_author_likes) | |
return filtered_authors | |
def refresh_data(last_time, models_df_state, authors_df_state, stats_markdown, model_table, author_table): | |
# Check if time changed | |
current_time = fetch_time() | |
if current_time != last_time and current_time != 0: | |
# Time changed, re-fetch data | |
data = fetch_data() | |
models_df, authors_df = create_dataframes(data) | |
total_models_count = data["total_models"] | |
total_downloads = data["total_downloads"] | |
total_likes = models_df["Likes"].sum() if "Likes" in models_df.columns else 0 | |
# Update stats markdown | |
new_stats_markdown = f""" | |
# GGUF Models and Authors Leaderboard | |
**Total Models:** {total_models_count} | **Total Downloads (30d):** {total_downloads} | **Total Likes:** {total_likes} | |
**Last Updated:** {current_time} | |
""" | |
# Update states | |
return current_time, models_df, authors_df, gr.update(value=new_stats_markdown), gr.update(value=models_df.iloc[:CHUNK_SIZE]), gr.update(value=authors_df) | |
else: | |
# No change | |
return last_time, models_df_state, authors_df_state, stats_markdown, model_table, author_table | |
# Initial fetch | |
initial_time = fetch_time() | |
data = fetch_data() | |
all_models_df, authors_df = create_dataframes(data) | |
total_models_count = data["total_models"] | |
total_downloads = data["total_downloads"] | |
total_likes = all_models_df["Likes"].sum() if "Likes" in all_models_df.columns else 0 | |
initial_stats_markdown = f""" | |
# GGUF Models and Authors Leaderboard | |
**Total Models:** {total_models_count} | **Total Downloads (30d):** {total_downloads} | **Total Likes:** {total_likes} | |
**Last Updated:** {initial_time} | |
""" | |
with gr.Blocks() as demo: | |
gr.Markdown(f""" | |
# 🚀GGUF Tracker🚀 | |
Welcome to 🚀**GGUF Tracker**🚀, a live-updating leaderboard for all things GGUF on 🚀Hugging Face. | |
Stats refresh every hour on the backend, but this interface checks every 5 minutes for updates. | |
By the way, I’m 🚀Richard Erkhov, and you can check out more of what I’m working on at my [🌟**github**](https://github.com/RichardErkhov), | |
[🌟**huggingface**](https://huggingface.co/RichardErkhov) or [🌟**erkhov.com**](https://erkhov.com). Go take a look—I think you’ll like what you find. | |
""") | |
stats_markdown = gr.Markdown(initial_stats_markdown) | |
with gr.Tabs(): | |
with gr.TabItem("Models"): | |
with gr.Row(): | |
search_query = gr.Textbox(label="Search (by Model ID or Author Name)") | |
min_downloads = gr.Number(label="Min Downloads (30d)", value=0) | |
min_likes = gr.Number(label="Min Likes", value=0) | |
filter_button = gr.Button("Apply Filters") | |
model_table = gr.DataFrame( | |
value=all_models_df.iloc[:CHUNK_SIZE], | |
interactive=False, | |
label="GGUF Models (Click column headers to sort)", | |
wrap=True, | |
datatype=["markdown", "markdown", "number", "number", "str", "str"] | |
) | |
load_more_button = gr.Button("Load More Models") | |
# States for models | |
start_idx = gr.State(value=CHUNK_SIZE) | |
filtered_df_state = gr.State(value=all_models_df) | |
filter_button.click( | |
fn=filter_models, | |
inputs=[filtered_df_state, search_query, min_downloads, min_likes], | |
outputs=[model_table, start_idx, filtered_df_state] | |
) | |
load_more_button.click(fn=update_model_table, inputs=[start_idx, filtered_df_state], outputs=[model_table, start_idx]) | |
with gr.TabItem("Authors"): | |
with gr.Row(): | |
author_search_query = gr.Textbox(label="Search by Author Name") | |
min_author_downloads = gr.Number(label="Min Total Downloads", value=0) | |
min_author_likes = gr.Number(label="Min Total Likes", value=0) | |
author_filter_button = gr.Button("Apply Filters") | |
author_table = gr.DataFrame( | |
value=authors_df, | |
interactive=False, | |
label="Authors (Click column headers to sort)", | |
wrap=True, | |
datatype=["markdown", "number", "number", "number"] | |
) | |
author_filter_button.click( | |
fn=filter_authors, | |
inputs=[authors_df, author_search_query, min_author_downloads, min_author_likes], | |
outputs=author_table | |
) | |
# States for refresh | |
last_hf_time_state = gr.State(value=initial_time) | |
models_df_state = gr.State(value=all_models_df) | |
authors_df_state = gr.State(value=authors_df) | |
# Timer to check every 5 minutes = 300 seconds | |
timer = gr.Timer(interval=300, fn=refresh_data, inputs=[ | |
last_hf_time_state, | |
models_df_state, | |
authors_df_state, | |
stats_markdown, | |
model_table, | |
author_table | |
], outputs=[ | |
last_hf_time_state, | |
models_df_state, | |
authors_df_state, | |
stats_markdown, | |
model_table, | |
author_table | |
]) | |
demo.launch() | |