Optimization
Browse filesInstead of huggingface hub api to search author on the server now we are searching author locally after we fetched all models/datasets/spaces. With this change the code is now very optimized.
app.py
CHANGED
@@ -9,25 +9,6 @@ import gradio as gr
|
|
9 |
|
10 |
api = HfApi()
|
11 |
|
12 |
-
def get_models(org_name, which_one):
|
13 |
-
all_list = []
|
14 |
-
if which_one == "models":
|
15 |
-
things = api.list_models(author=org_name)
|
16 |
-
elif which_one == "datasets":
|
17 |
-
things = api.list_datasets(author=org_name)
|
18 |
-
elif which_one == "spaces":
|
19 |
-
things = api.list_spaces(author=org_name)
|
20 |
-
|
21 |
-
for i in things:
|
22 |
-
i = i.__dict__
|
23 |
-
json_format_data = {"id": i['id'], "downloads": i['downloads'], "likes": i['likes']} if which_one != "spaces" else {"id": i['id'], "downloads": 0, "likes": i['likes']}
|
24 |
-
|
25 |
-
all_list.append(json_format_data)
|
26 |
-
|
27 |
-
|
28 |
-
df_all_list = (pd.DataFrame(all_list))
|
29 |
-
|
30 |
-
return df_all_list
|
31 |
|
32 |
def get_most(df_for_most_function):
|
33 |
download_sorted_df = df_for_most_function.sort_values(by=['downloads'], ascending=False)
|
@@ -63,21 +44,60 @@ def get_openllm_leaderboard():
|
|
63 |
except (IndexError, AttributeError):
|
64 |
return result_list
|
65 |
|
|
|
66 |
def get_ranking(model_list, target_org):
|
67 |
for index, model in enumerate(model_list):
|
68 |
if model.split("/")[0].lower() == target_org.lower():
|
69 |
return [index+1, model]
|
70 |
return "Not Found"
|
71 |
|
72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
data_rows = []
|
74 |
open_llm_leaderboard = get_openllm_leaderboard() if which_one == "models" else None
|
75 |
|
76 |
trend = get_trending_list(1, which_one)
|
77 |
|
78 |
-
for org in tqdm(orgs, desc=f"
|
79 |
rank = get_ranking_trend(trend, org)
|
80 |
-
|
|
|
|
|
81 |
if len(df) == 0:
|
82 |
continue
|
83 |
num_things = len(df)
|
@@ -140,34 +160,6 @@ def make_leaderboard(orgs, which_one):
|
|
140 |
leaderboard.insert(0, "Serial Number", range(1, len(leaderboard) + 1))
|
141 |
return leaderboard
|
142 |
|
143 |
-
"""# Gradio başlasın
|
144 |
-
"""
|
145 |
-
|
146 |
-
with open("org_names.txt", "r") as f:
|
147 |
-
org_names_in_list = [i.rstrip("\n") for i in f.readlines()]
|
148 |
-
|
149 |
-
|
150 |
-
INTRODUCTION_TEXT = f"""
|
151 |
-
🎯 The Organization Leaderboard aims to track organization rankings. This space is inspired by the [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard).
|
152 |
-
|
153 |
-
## Available Dataframes:
|
154 |
-
|
155 |
-
- 🏛️ Models
|
156 |
-
|
157 |
-
- 📊 Datasets
|
158 |
-
|
159 |
-
- 🚀 Spaces
|
160 |
-
|
161 |
-
## Backend
|
162 |
-
|
163 |
-
🛠️ The leaderboard's backend mainly runs on the [Hugging Face Hub API](https://huggingface.co/docs/huggingface_hub/v0.5.1/en/package_reference/hf_api).
|
164 |
-
|
165 |
-
🛠️ Organization names are retrieved using web scraping from [Huggingface Organizations](https://huggingface.co/organizations).
|
166 |
-
|
167 |
-
**🌐 Note:** In the model's dataframe, there are some columns related to the [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard). This data is also retrieved through web scraping.
|
168 |
-
|
169 |
-
**🌐 Note:** In trending models, first 300 models/datasets/spaces is being retrieved from huggingface.
|
170 |
-
"""
|
171 |
|
172 |
def clickable(x, which_one):
|
173 |
if which_one == "models":
|
@@ -216,16 +208,44 @@ def get_ranking_trend(json_data, org_name):
|
|
216 |
else:
|
217 |
return {"id": "Not Found", "rank": "Not Found"}
|
218 |
|
219 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
|
221 |
with gr.Blocks() as demo:
|
222 |
gr.Markdown("""<h1 align="center" id="space-title">🤗 Organization Leaderboard</h1>""")
|
223 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
224 |
|
225 |
-
|
|
|
|
|
|
|
226 |
|
|
|
227 |
columns_to_convert = ["Organization Name", "Best Model On Open LLM Leaderboard", "Most Downloaded Model", "Most Liked Model", "Trending Model"]
|
228 |
-
models_df = make_leaderboard(org_names_in_list, "models")
|
229 |
models_df = models_df_to_clickable(models_df, columns_to_convert, "models")
|
230 |
|
231 |
headers = ["🔢 Serial Number", "🏢 Organization Name", "📥 Total Downloads", "👍 Total Likes", "🤖 Number of Models", "🏆 Best Model On Open LLM Leaderboard", "🥇 Best Rank On Open LLM Leaderboard", "📊 Average Downloads per Model", "📈 Average Likes per Model", "🚀 Most Downloaded Model", "📈 Most Download Count", "❤️ Most Liked Model", "👍 Most Like Count", "🔥 Trending Model", "👑 Best Rank at Trending Models"]
|
@@ -233,7 +253,7 @@ with gr.Blocks() as demo:
|
|
233 |
|
234 |
with gr.TabItem("📊 Datasets", id=2):
|
235 |
columns_to_convert = ["Organization Name", "Most Downloaded Dataset", "Most Liked Dataset", "Trending Dataset"]
|
236 |
-
dataset_df = make_leaderboard(org_names_in_list, "datasets")
|
237 |
dataset_df = models_df_to_clickable(dataset_df, columns_to_convert, "datasets")
|
238 |
|
239 |
headers = ["🔢 Serial Number", "🏢 Organization Name", "📥 Total Downloads", "👍 Total Likes", "📊 Number of Datasets", "📊 Average Downloads per Dataset", "📈 Average Likes per Dataset", "🚀 Most Downloaded Dataset", "📈 Most Download Count", "❤️ Most Liked Dataset", "👍 Most Like Count", "🔥 Trending Dataset", "👑 Best Rank at Trending Datasets"]
|
@@ -242,11 +262,10 @@ with gr.Blocks() as demo:
|
|
242 |
with gr.TabItem("🚀 Spaces", id=3):
|
243 |
columns_to_convert = ["Organization Name", "Most Liked Space", "Trending Space"]
|
244 |
|
245 |
-
spaces_df = make_leaderboard(org_names_in_list, "spaces")
|
246 |
spaces_df = models_df_to_clickable(spaces_df, columns_to_convert, "spaces")
|
247 |
|
248 |
headers = ["🔢 Serial Number", "🏢 Organization Name", "👍 Total Likes", "🚀 Number of Spaces", "📈 Average Likes per Space", "❤️ Most Liked Space", "👍 Most Like Count", "🔥 Trending Space", "👑 Best Rank at Trending Spaces"]
|
249 |
gr.Dataframe(spaces_df.head(150), headers=headers, interactive=False, datatype=["str", "markdown", "str", "str", "str", "markdown", "str", "markdown", "str"])
|
250 |
|
251 |
-
demo.launch()
|
252 |
-
|
|
|
9 |
|
10 |
api = HfApi()
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
def get_most(df_for_most_function):
|
14 |
download_sorted_df = df_for_most_function.sort_values(by=['downloads'], ascending=False)
|
|
|
44 |
except (IndexError, AttributeError):
|
45 |
return result_list
|
46 |
|
47 |
+
|
48 |
def get_ranking(model_list, target_org):
|
49 |
for index, model in enumerate(model_list):
|
50 |
if model.split("/")[0].lower() == target_org.lower():
|
51 |
return [index+1, model]
|
52 |
return "Not Found"
|
53 |
|
54 |
+
|
55 |
+
def get_models(which_one):
|
56 |
+
if which_one == "models":
|
57 |
+
data = api.list_models()
|
58 |
+
elif which_one == "datasets":
|
59 |
+
data = api.list_datasets()
|
60 |
+
elif which_one == "spaces":
|
61 |
+
data = api.list_spaces()
|
62 |
+
|
63 |
+
all_list = []
|
64 |
+
for i in tqdm(data, desc=f"Scraping {which_one}", position=0, leave=True):
|
65 |
+
i = i.__dict__
|
66 |
+
|
67 |
+
id = i["id"].split("/")
|
68 |
+
if len(id) != 1:
|
69 |
+
json_format_data = {"author": id[0] ,"id": "/".join(id), "downloads": i['downloads'], "likes": i['likes']} if which_one != "spaces" else {"author": id[0] ,"id": "/".join(id), "downloads": 0, "likes": i['likes']}
|
70 |
+
|
71 |
+
|
72 |
+
all_list.append(json_format_data)
|
73 |
+
return all_list
|
74 |
+
|
75 |
+
|
76 |
+
def search(models_dict, author_name):
|
77 |
+
return pd.DataFrame(models_dict.get(author_name, []))
|
78 |
+
|
79 |
+
|
80 |
+
def group_models_by_author(all_things):
|
81 |
+
models_by_author = {}
|
82 |
+
for model in all_things:
|
83 |
+
author_name = model['author']
|
84 |
+
if author_name not in models_by_author:
|
85 |
+
models_by_author[author_name] = []
|
86 |
+
models_by_author[author_name].append(model)
|
87 |
+
return models_by_author
|
88 |
+
|
89 |
+
|
90 |
+
def make_leaderboard(orgs, which_one, data):
|
91 |
data_rows = []
|
92 |
open_llm_leaderboard = get_openllm_leaderboard() if which_one == "models" else None
|
93 |
|
94 |
trend = get_trending_list(1, which_one)
|
95 |
|
96 |
+
for org in tqdm(orgs, desc=f"Proccesing Organizations ({which_one})", position=0, leave=True):
|
97 |
rank = get_ranking_trend(trend, org)
|
98 |
+
|
99 |
+
df = search(data, org)
|
100 |
+
|
101 |
if len(df) == 0:
|
102 |
continue
|
103 |
num_things = len(df)
|
|
|
160 |
leaderboard.insert(0, "Serial Number", range(1, len(leaderboard) + 1))
|
161 |
return leaderboard
|
162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
|
164 |
def clickable(x, which_one):
|
165 |
if which_one == "models":
|
|
|
208 |
else:
|
209 |
return {"id": "Not Found", "rank": "Not Found"}
|
210 |
|
211 |
+
with open("org_names.txt", "r") as f:
|
212 |
+
org_names_in_list = [i.rstrip("\n") for i in f.readlines()]
|
213 |
+
|
214 |
+
|
215 |
+
INTRODUCTION_TEXT = f"""
|
216 |
+
🎯 The Organization Leaderboard aims to track organization rankings. This space is inspired by the [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard).
|
217 |
+
|
218 |
+
## Available Dataframes:
|
219 |
+
|
220 |
+
- 🏛️ Models
|
221 |
+
|
222 |
+
- 📊 Datasets
|
223 |
+
|
224 |
+
- 🚀 Spaces
|
225 |
+
|
226 |
+
## Backend
|
227 |
+
|
228 |
+
🛠️ The leaderboard's backend mainly runs on the [Hugging Face Hub API](https://huggingface.co/docs/huggingface_hub/v0.5.1/en/package_reference/hf_api).
|
229 |
+
|
230 |
+
🛠️ Organization names are retrieved using web scraping from [Huggingface Organizations](https://huggingface.co/organizations).
|
231 |
+
|
232 |
+
**🌐 Note:** In the model's dataframe, there are some columns related to the [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard). This data is also retrieved through web scraping.
|
233 |
+
|
234 |
+
**🌐 Note:** In trending models, first 300 models/datasets/spaces is being retrieved from huggingface.
|
235 |
+
"""
|
236 |
|
237 |
with gr.Blocks() as demo:
|
238 |
gr.Markdown("""<h1 align="center" id="space-title">🤗 Organization Leaderboard</h1>""")
|
239 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
240 |
|
241 |
+
all_models = get_models("models")
|
242 |
+
all_datasets = get_models("datasets")
|
243 |
+
all_spaces = get_models("spaces")
|
244 |
+
|
245 |
|
246 |
+
with gr.TabItem("🏛️ Models", id=1):
|
247 |
columns_to_convert = ["Organization Name", "Best Model On Open LLM Leaderboard", "Most Downloaded Model", "Most Liked Model", "Trending Model"]
|
248 |
+
models_df = make_leaderboard(org_names_in_list, "models", group_models_by_author(all_models))
|
249 |
models_df = models_df_to_clickable(models_df, columns_to_convert, "models")
|
250 |
|
251 |
headers = ["🔢 Serial Number", "🏢 Organization Name", "📥 Total Downloads", "👍 Total Likes", "🤖 Number of Models", "🏆 Best Model On Open LLM Leaderboard", "🥇 Best Rank On Open LLM Leaderboard", "📊 Average Downloads per Model", "📈 Average Likes per Model", "🚀 Most Downloaded Model", "📈 Most Download Count", "❤️ Most Liked Model", "👍 Most Like Count", "🔥 Trending Model", "👑 Best Rank at Trending Models"]
|
|
|
253 |
|
254 |
with gr.TabItem("📊 Datasets", id=2):
|
255 |
columns_to_convert = ["Organization Name", "Most Downloaded Dataset", "Most Liked Dataset", "Trending Dataset"]
|
256 |
+
dataset_df = make_leaderboard(org_names_in_list, "datasets", group_models_by_author(all_datasets))
|
257 |
dataset_df = models_df_to_clickable(dataset_df, columns_to_convert, "datasets")
|
258 |
|
259 |
headers = ["🔢 Serial Number", "🏢 Organization Name", "📥 Total Downloads", "👍 Total Likes", "📊 Number of Datasets", "📊 Average Downloads per Dataset", "📈 Average Likes per Dataset", "🚀 Most Downloaded Dataset", "📈 Most Download Count", "❤️ Most Liked Dataset", "👍 Most Like Count", "🔥 Trending Dataset", "👑 Best Rank at Trending Datasets"]
|
|
|
262 |
with gr.TabItem("🚀 Spaces", id=3):
|
263 |
columns_to_convert = ["Organization Name", "Most Liked Space", "Trending Space"]
|
264 |
|
265 |
+
spaces_df = make_leaderboard(org_names_in_list, "spaces", group_models_by_author(all_spaces))
|
266 |
spaces_df = models_df_to_clickable(spaces_df, columns_to_convert, "spaces")
|
267 |
|
268 |
headers = ["🔢 Serial Number", "🏢 Organization Name", "👍 Total Likes", "🚀 Number of Spaces", "📈 Average Likes per Space", "❤️ Most Liked Space", "👍 Most Like Count", "🔥 Trending Space", "👑 Best Rank at Trending Spaces"]
|
269 |
gr.Dataframe(spaces_df.head(150), headers=headers, interactive=False, datatype=["str", "markdown", "str", "str", "str", "markdown", "str", "markdown", "str"])
|
270 |
|
271 |
+
demo.launch()
|
|