Weyaxi commited on
Commit
ea78272
1 Parent(s): d27a320

Optimization

Browse files

Instead of huggingface hub api to search author on the server now we are searching author locally after we fetched all models/datasets/spaces. With this change the code is now very optimized.

Files changed (1) hide show
  1. app.py +76 -57
app.py CHANGED
@@ -9,25 +9,6 @@ import gradio as gr
9
 
10
  api = HfApi()
11
 
12
- def get_models(org_name, which_one):
13
- all_list = []
14
- if which_one == "models":
15
- things = api.list_models(author=org_name)
16
- elif which_one == "datasets":
17
- things = api.list_datasets(author=org_name)
18
- elif which_one == "spaces":
19
- things = api.list_spaces(author=org_name)
20
-
21
- for i in things:
22
- i = i.__dict__
23
- json_format_data = {"id": i['id'], "downloads": i['downloads'], "likes": i['likes']} if which_one != "spaces" else {"id": i['id'], "downloads": 0, "likes": i['likes']}
24
-
25
- all_list.append(json_format_data)
26
-
27
-
28
- df_all_list = (pd.DataFrame(all_list))
29
-
30
- return df_all_list
31
 
32
  def get_most(df_for_most_function):
33
  download_sorted_df = df_for_most_function.sort_values(by=['downloads'], ascending=False)
@@ -63,21 +44,60 @@ def get_openllm_leaderboard():
63
  except (IndexError, AttributeError):
64
  return result_list
65
 
 
66
  def get_ranking(model_list, target_org):
67
  for index, model in enumerate(model_list):
68
  if model.split("/")[0].lower() == target_org.lower():
69
  return [index+1, model]
70
  return "Not Found"
71
 
72
- def make_leaderboard(orgs, which_one):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  data_rows = []
74
  open_llm_leaderboard = get_openllm_leaderboard() if which_one == "models" else None
75
 
76
  trend = get_trending_list(1, which_one)
77
 
78
- for org in tqdm(orgs, desc=f"Scraping Organizations ({which_one})", position=0, leave=True):
79
  rank = get_ranking_trend(trend, org)
80
- df = get_models(org, which_one)
 
 
81
  if len(df) == 0:
82
  continue
83
  num_things = len(df)
@@ -140,34 +160,6 @@ def make_leaderboard(orgs, which_one):
140
  leaderboard.insert(0, "Serial Number", range(1, len(leaderboard) + 1))
141
  return leaderboard
142
 
143
- """# Gradio başlasın
144
- """
145
-
146
- with open("org_names.txt", "r") as f:
147
- org_names_in_list = [i.rstrip("\n") for i in f.readlines()]
148
-
149
-
150
- INTRODUCTION_TEXT = f"""
151
- 🎯 The Organization Leaderboard aims to track organization rankings. This space is inspired by the [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard).
152
-
153
- ## Available Dataframes:
154
-
155
- - 🏛️ Models
156
-
157
- - 📊 Datasets
158
-
159
- - 🚀 Spaces
160
-
161
- ## Backend
162
-
163
- 🛠️ The leaderboard's backend mainly runs on the [Hugging Face Hub API](https://huggingface.co/docs/huggingface_hub/v0.5.1/en/package_reference/hf_api).
164
-
165
- 🛠️ Organization names are retrieved using web scraping from [Huggingface Organizations](https://huggingface.co/organizations).
166
-
167
- **🌐 Note:** In the model's dataframe, there are some columns related to the [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard). This data is also retrieved through web scraping.
168
-
169
- **🌐 Note:** In trending models, first 300 models/datasets/spaces is being retrieved from huggingface.
170
- """
171
 
172
  def clickable(x, which_one):
173
  if which_one == "models":
@@ -216,16 +208,44 @@ def get_ranking_trend(json_data, org_name):
216
  else:
217
  return {"id": "Not Found", "rank": "Not Found"}
218
 
219
- demo = gr.Blocks()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
 
221
  with gr.Blocks() as demo:
222
  gr.Markdown("""<h1 align="center" id="space-title">🤗 Organization Leaderboard</h1>""")
223
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
224
 
225
- with gr.TabItem("🏛️ Models", id=1):
 
 
 
226
 
 
227
  columns_to_convert = ["Organization Name", "Best Model On Open LLM Leaderboard", "Most Downloaded Model", "Most Liked Model", "Trending Model"]
228
- models_df = make_leaderboard(org_names_in_list, "models")
229
  models_df = models_df_to_clickable(models_df, columns_to_convert, "models")
230
 
231
  headers = ["🔢 Serial Number", "🏢 Organization Name", "📥 Total Downloads", "👍 Total Likes", "🤖 Number of Models", "🏆 Best Model On Open LLM Leaderboard", "🥇 Best Rank On Open LLM Leaderboard", "📊 Average Downloads per Model", "📈 Average Likes per Model", "🚀 Most Downloaded Model", "📈 Most Download Count", "❤️ Most Liked Model", "👍 Most Like Count", "🔥 Trending Model", "👑 Best Rank at Trending Models"]
@@ -233,7 +253,7 @@ with gr.Blocks() as demo:
233
 
234
  with gr.TabItem("📊 Datasets", id=2):
235
  columns_to_convert = ["Organization Name", "Most Downloaded Dataset", "Most Liked Dataset", "Trending Dataset"]
236
- dataset_df = make_leaderboard(org_names_in_list, "datasets")
237
  dataset_df = models_df_to_clickable(dataset_df, columns_to_convert, "datasets")
238
 
239
  headers = ["🔢 Serial Number", "🏢 Organization Name", "📥 Total Downloads", "👍 Total Likes", "📊 Number of Datasets", "📊 Average Downloads per Dataset", "📈 Average Likes per Dataset", "🚀 Most Downloaded Dataset", "📈 Most Download Count", "❤️ Most Liked Dataset", "👍 Most Like Count", "🔥 Trending Dataset", "👑 Best Rank at Trending Datasets"]
@@ -242,11 +262,10 @@ with gr.Blocks() as demo:
242
  with gr.TabItem("🚀 Spaces", id=3):
243
  columns_to_convert = ["Organization Name", "Most Liked Space", "Trending Space"]
244
 
245
- spaces_df = make_leaderboard(org_names_in_list, "spaces")
246
  spaces_df = models_df_to_clickable(spaces_df, columns_to_convert, "spaces")
247
 
248
  headers = ["🔢 Serial Number", "🏢 Organization Name", "👍 Total Likes", "🚀 Number of Spaces", "📈 Average Likes per Space", "❤️ Most Liked Space", "👍 Most Like Count", "🔥 Trending Space", "👑 Best Rank at Trending Spaces"]
249
  gr.Dataframe(spaces_df.head(150), headers=headers, interactive=False, datatype=["str", "markdown", "str", "str", "str", "markdown", "str", "markdown", "str"])
250
 
251
- demo.launch()
252
-
 
9
 
10
  api = HfApi()
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  def get_most(df_for_most_function):
14
  download_sorted_df = df_for_most_function.sort_values(by=['downloads'], ascending=False)
 
44
  except (IndexError, AttributeError):
45
  return result_list
46
 
47
+
48
  def get_ranking(model_list, target_org):
49
  for index, model in enumerate(model_list):
50
  if model.split("/")[0].lower() == target_org.lower():
51
  return [index+1, model]
52
  return "Not Found"
53
 
54
+
55
+ def get_models(which_one):
56
+ if which_one == "models":
57
+ data = api.list_models()
58
+ elif which_one == "datasets":
59
+ data = api.list_datasets()
60
+ elif which_one == "spaces":
61
+ data = api.list_spaces()
62
+
63
+ all_list = []
64
+ for i in tqdm(data, desc=f"Scraping {which_one}", position=0, leave=True):
65
+ i = i.__dict__
66
+
67
+ id = i["id"].split("/")
68
+ if len(id) != 1:
69
+ json_format_data = {"author": id[0] ,"id": "/".join(id), "downloads": i['downloads'], "likes": i['likes']} if which_one != "spaces" else {"author": id[0] ,"id": "/".join(id), "downloads": 0, "likes": i['likes']}
70
+
71
+
72
+ all_list.append(json_format_data)
73
+ return all_list
74
+
75
+
76
+ def search(models_dict, author_name):
77
+ return pd.DataFrame(models_dict.get(author_name, []))
78
+
79
+
80
+ def group_models_by_author(all_things):
81
+ models_by_author = {}
82
+ for model in all_things:
83
+ author_name = model['author']
84
+ if author_name not in models_by_author:
85
+ models_by_author[author_name] = []
86
+ models_by_author[author_name].append(model)
87
+ return models_by_author
88
+
89
+
90
+ def make_leaderboard(orgs, which_one, data):
91
  data_rows = []
92
  open_llm_leaderboard = get_openllm_leaderboard() if which_one == "models" else None
93
 
94
  trend = get_trending_list(1, which_one)
95
 
96
+ for org in tqdm(orgs, desc=f"Proccesing Organizations ({which_one})", position=0, leave=True):
97
  rank = get_ranking_trend(trend, org)
98
+
99
+ df = search(data, org)
100
+
101
  if len(df) == 0:
102
  continue
103
  num_things = len(df)
 
160
  leaderboard.insert(0, "Serial Number", range(1, len(leaderboard) + 1))
161
  return leaderboard
162
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
 
164
  def clickable(x, which_one):
165
  if which_one == "models":
 
208
  else:
209
  return {"id": "Not Found", "rank": "Not Found"}
210
 
211
+ with open("org_names.txt", "r") as f:
212
+ org_names_in_list = [i.rstrip("\n") for i in f.readlines()]
213
+
214
+
215
+ INTRODUCTION_TEXT = f"""
216
+ 🎯 The Organization Leaderboard aims to track organization rankings. This space is inspired by the [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard).
217
+
218
+ ## Available Dataframes:
219
+
220
+ - 🏛️ Models
221
+
222
+ - 📊 Datasets
223
+
224
+ - 🚀 Spaces
225
+
226
+ ## Backend
227
+
228
+ 🛠️ The leaderboard's backend mainly runs on the [Hugging Face Hub API](https://huggingface.co/docs/huggingface_hub/v0.5.1/en/package_reference/hf_api).
229
+
230
+ 🛠️ Organization names are retrieved using web scraping from [Huggingface Organizations](https://huggingface.co/organizations).
231
+
232
+ **🌐 Note:** In the model's dataframe, there are some columns related to the [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard). This data is also retrieved through web scraping.
233
+
234
+ **🌐 Note:** In trending models, first 300 models/datasets/spaces is being retrieved from huggingface.
235
+ """
236
 
237
  with gr.Blocks() as demo:
238
  gr.Markdown("""<h1 align="center" id="space-title">🤗 Organization Leaderboard</h1>""")
239
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
240
 
241
+ all_models = get_models("models")
242
+ all_datasets = get_models("datasets")
243
+ all_spaces = get_models("spaces")
244
+
245
 
246
+ with gr.TabItem("🏛️ Models", id=1):
247
  columns_to_convert = ["Organization Name", "Best Model On Open LLM Leaderboard", "Most Downloaded Model", "Most Liked Model", "Trending Model"]
248
+ models_df = make_leaderboard(org_names_in_list, "models", group_models_by_author(all_models))
249
  models_df = models_df_to_clickable(models_df, columns_to_convert, "models")
250
 
251
  headers = ["🔢 Serial Number", "🏢 Organization Name", "📥 Total Downloads", "👍 Total Likes", "🤖 Number of Models", "🏆 Best Model On Open LLM Leaderboard", "🥇 Best Rank On Open LLM Leaderboard", "📊 Average Downloads per Model", "📈 Average Likes per Model", "🚀 Most Downloaded Model", "📈 Most Download Count", "❤️ Most Liked Model", "👍 Most Like Count", "🔥 Trending Model", "👑 Best Rank at Trending Models"]
 
253
 
254
  with gr.TabItem("📊 Datasets", id=2):
255
  columns_to_convert = ["Organization Name", "Most Downloaded Dataset", "Most Liked Dataset", "Trending Dataset"]
256
+ dataset_df = make_leaderboard(org_names_in_list, "datasets", group_models_by_author(all_datasets))
257
  dataset_df = models_df_to_clickable(dataset_df, columns_to_convert, "datasets")
258
 
259
  headers = ["🔢 Serial Number", "🏢 Organization Name", "📥 Total Downloads", "👍 Total Likes", "📊 Number of Datasets", "📊 Average Downloads per Dataset", "📈 Average Likes per Dataset", "🚀 Most Downloaded Dataset", "📈 Most Download Count", "❤️ Most Liked Dataset", "👍 Most Like Count", "🔥 Trending Dataset", "👑 Best Rank at Trending Datasets"]
 
262
  with gr.TabItem("🚀 Spaces", id=3):
263
  columns_to_convert = ["Organization Name", "Most Liked Space", "Trending Space"]
264
 
265
+ spaces_df = make_leaderboard(org_names_in_list, "spaces", group_models_by_author(all_spaces))
266
  spaces_df = models_df_to_clickable(spaces_df, columns_to_convert, "spaces")
267
 
268
  headers = ["🔢 Serial Number", "🏢 Organization Name", "👍 Total Likes", "🚀 Number of Spaces", "📈 Average Likes per Space", "❤️ Most Liked Space", "👍 Most Like Count", "🔥 Trending Space", "👑 Best Rank at Trending Spaces"]
269
  gr.Dataframe(spaces_df.head(150), headers=headers, interactive=False, datatype=["str", "markdown", "str", "str", "str", "markdown", "str", "markdown", "str"])
270
 
271
+ demo.launch()