Spaces:
Runtime error
Runtime error
Sébastien De Greef
commited on
Commit
·
bde8b55
1
Parent(s):
d6fc6f4
chore: Update Dockerfile to use pip3 for installing requirements
Browse files- ModelsCatalog.ipynb +219 -0
ModelsCatalog.ipynb
ADDED
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 91,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"name": "stdout",
|
10 |
+
"output_type": "stream",
|
11 |
+
"text": [
|
12 |
+
"https://ollama.com/library\n"
|
13 |
+
]
|
14 |
+
}
|
15 |
+
],
|
16 |
+
"source": [
|
17 |
+
"from bs4 import BeautifulSoup\n",
|
18 |
+
"from requests import get\n",
|
19 |
+
"# download the HTML content\n",
|
20 |
+
"\n",
|
21 |
+
"base_url = 'https://ollama.com'\n",
|
22 |
+
"library_url = f'{base_url}/library'\n",
|
23 |
+
"print(library_url)\n",
|
24 |
+
"html_content = get(library_url).text\n",
|
25 |
+
"\n",
|
26 |
+
"\n",
|
27 |
+
"# Parse the HTML content with BeautifulSoup\n",
|
28 |
+
"soup = BeautifulSoup(html_content, 'html.parser')\n",
|
29 |
+
"\n",
|
30 |
+
"# Extract all the li elements within the ul\n",
|
31 |
+
"li_items = soup.select('ul[role=\"list\"] > li')\n",
|
32 |
+
"\n",
|
33 |
+
"models = []\n",
|
34 |
+
"\n",
|
35 |
+
"# Iterate over the extracted li elements and print them\n",
|
36 |
+
"for li in li_items:\n",
|
37 |
+
" # get first a tag text\n",
|
38 |
+
" sizes = li.div.div.select('span')\n",
|
39 |
+
" sizes = [size.text for size in sizes]\n",
|
40 |
+
"\n",
|
41 |
+
" pulls = li.div.select('p')[1].select('span')\n",
|
42 |
+
" # remove svg tags from pulls\n",
|
43 |
+
" pulls = [pull.text[:-1] for pull in pulls]\n",
|
44 |
+
" pulls = pulls[0].split('\\xa0')[0].strip()\n",
|
45 |
+
"\n",
|
46 |
+
" model = {\n",
|
47 |
+
" \"name\": li.h2.text.strip(),\n",
|
48 |
+
" \"description\": li.p.text.strip(),\n",
|
49 |
+
" \"url\": f\"{base_url}{li.a['href']}\",\n",
|
50 |
+
" \"params\": sizes,\n",
|
51 |
+
" \"pulls\": pulls \n",
|
52 |
+
" }\n",
|
53 |
+
" models.append(model)\n",
|
54 |
+
"import json\n",
|
55 |
+
"with open('models.json', 'w', encoding=\"utf-8\") as file:\n",
|
56 |
+
" file.write(json.dumps(models, indent=4, ensure_ascii=False))"
|
57 |
+
]
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"cell_type": "code",
|
61 |
+
"execution_count": 97,
|
62 |
+
"metadata": {},
|
63 |
+
"outputs": [
|
64 |
+
{
|
65 |
+
"name": "stdout",
|
66 |
+
"output_type": "stream",
|
67 |
+
"text": [
|
68 |
+
"Model: llama3\n",
|
69 |
+
"Model: phi3\n",
|
70 |
+
"Model: wizardlm2\n",
|
71 |
+
"Model: mistral\n",
|
72 |
+
"Model: gemma\n",
|
73 |
+
"Model: mixtral\n",
|
74 |
+
"Model: llama2\n",
|
75 |
+
"Model: codegemma\n",
|
76 |
+
"Model: command-r\n",
|
77 |
+
"Model: command-r-plus\n",
|
78 |
+
"Model: llava\n",
|
79 |
+
"Model: dbrx\n",
|
80 |
+
"Model: codellama\n",
|
81 |
+
"Model: qwen\n",
|
82 |
+
"Model: dolphin-mixtral\n",
|
83 |
+
"Model: llama2-uncensored\n",
|
84 |
+
"Model: deepseek-coder\n",
|
85 |
+
"Model: mistral-openorca\n",
|
86 |
+
"Model: nomic-embed-text\n",
|
87 |
+
"Model: dolphin-mistral\n",
|
88 |
+
"Model: phi\n",
|
89 |
+
"Model: orca-mini\n",
|
90 |
+
"Model: nous-hermes2\n",
|
91 |
+
"Model: zephyr\n",
|
92 |
+
"Model: llama2-chinese\n",
|
93 |
+
"Model: wizard-vicuna-uncensored\n",
|
94 |
+
"Model: starcoder2\n",
|
95 |
+
"Model: vicuna\n",
|
96 |
+
"Model: tinyllama\n",
|
97 |
+
"Model: openhermes\n",
|
98 |
+
"Model: starcoder\n",
|
99 |
+
"Model: openchat\n",
|
100 |
+
"Model: dolphin-llama3\n",
|
101 |
+
"Model: yi\n",
|
102 |
+
"Model: tinydolphin\n",
|
103 |
+
"Model: wizardcoder\n",
|
104 |
+
"Model: stable-code\n",
|
105 |
+
"Model: mxbai-embed-large\n",
|
106 |
+
"Model: neural-chat\n",
|
107 |
+
"Model: phind-codellama\n",
|
108 |
+
"Model: wizard-math\n",
|
109 |
+
"Model: starling-lm\n",
|
110 |
+
"Model: falcon\n",
|
111 |
+
"Model: dolphincoder\n",
|
112 |
+
"Model: nous-hermes\n",
|
113 |
+
"Model: orca2\n",
|
114 |
+
"Model: sqlcoder\n",
|
115 |
+
"Model: stablelm2\n",
|
116 |
+
"Model: dolphin-phi\n",
|
117 |
+
"Model: solar\n",
|
118 |
+
"Model: yarn-llama2\n",
|
119 |
+
"Model: deepseek-llm\n",
|
120 |
+
"Model: codeqwen\n",
|
121 |
+
"Model: bakllava\n",
|
122 |
+
"Model: all-minilm\n",
|
123 |
+
"Model: samantha-mistral\n",
|
124 |
+
"Model: llama3-gradient\n",
|
125 |
+
"Model: medllama2\n",
|
126 |
+
"Model: wizardlm-uncensored\n",
|
127 |
+
"Model: xwinlm\n",
|
128 |
+
"Model: nous-hermes2-mixtral\n",
|
129 |
+
"Model: stable-beluga\n",
|
130 |
+
"Model: wizardlm\n",
|
131 |
+
"Model: codeup\n",
|
132 |
+
"Model: yarn-mistral\n",
|
133 |
+
"Model: everythinglm\n",
|
134 |
+
"Model: meditron\n",
|
135 |
+
"Model: llama-pro\n",
|
136 |
+
"Model: magicoder\n",
|
137 |
+
"Model: stablelm-zephyr\n",
|
138 |
+
"Model: nexusraven\n",
|
139 |
+
"Model: codebooga\n",
|
140 |
+
"Model: mistrallite\n",
|
141 |
+
"Model: llama3-chatqa\n",
|
142 |
+
"Model: wizard-vicuna\n",
|
143 |
+
"Model: snowflake-arctic-embed\n",
|
144 |
+
"Model: llava-llama3\n",
|
145 |
+
"Model: goliath\n",
|
146 |
+
"Model: open-orca-platypus2\n",
|
147 |
+
"Model: moondream\n",
|
148 |
+
"Model: duckdb-nsql\n",
|
149 |
+
"Model: notux\n",
|
150 |
+
"Model: megadolphin\n",
|
151 |
+
"Model: notus\n",
|
152 |
+
"Model: alfred\n",
|
153 |
+
"Model: llava-phi3\n",
|
154 |
+
"Model: falcon2\n"
|
155 |
+
]
|
156 |
+
}
|
157 |
+
],
|
158 |
+
"source": [
|
159 |
+
"for model in models:\n",
|
160 |
+
" tagsurl = f\"{model['url']}/tags\"\n",
|
161 |
+
" tags_page = get(tagsurl).text\n",
|
162 |
+
" # Parse the HTML content with BeautifulSoup\n",
|
163 |
+
" soup = BeautifulSoup(tags_page, 'html.parser')\n",
|
164 |
+
" # select links with the class group\n",
|
165 |
+
" tags = soup.select('a.group')\n",
|
166 |
+
" print(f\"Model: {model['name']}\")\n",
|
167 |
+
" model_tags = []\n",
|
168 |
+
" for tag in tags:\n",
|
169 |
+
" # get the parent div of the tag\n",
|
170 |
+
" parent = tag.parent\n",
|
171 |
+
" sizes = parent.parent.select('div.items-baseline')[0].text.strip().split(' • ',2)\n",
|
172 |
+
" # strip each size\n",
|
173 |
+
" sizes = [size.strip() for size in sizes]\n",
|
174 |
+
" model_tags.append({\n",
|
175 |
+
" \"name\": tag.text.strip(),\n",
|
176 |
+
" \"url\": f\"{base_url}{tag['href']}\",\n",
|
177 |
+
" \"size\": sizes[1],\n",
|
178 |
+
" \"hash\": sizes[0],\n",
|
179 |
+
" \"updated\": sizes[2],\n",
|
180 |
+
" })\n",
|
181 |
+
" link = tag['href']\n",
|
182 |
+
" #print(sizes,\"----\")\n",
|
183 |
+
" # get the next sibling of the parent div\n",
|
184 |
+
" sibling = parent.select('span')\n",
|
185 |
+
" if len(sibling) == 1:\n",
|
186 |
+
" hash = sibling[0].text.strip()\n",
|
187 |
+
" if len(sibling) == 3:\n",
|
188 |
+
" size = sibling[2].strip()\n",
|
189 |
+
" else:\n",
|
190 |
+
" pass\n",
|
191 |
+
" #print(sibling)\n",
|
192 |
+
" model[\"tags\"] = model_tags\n",
|
193 |
+
"with open('models.json', 'w', encoding=\"utf-8\") as file:\n",
|
194 |
+
" file.write(json.dumps(models, indent=4, ensure_ascii=False))"
|
195 |
+
]
|
196 |
+
}
|
197 |
+
],
|
198 |
+
"metadata": {
|
199 |
+
"kernelspec": {
|
200 |
+
"display_name": "base",
|
201 |
+
"language": "python",
|
202 |
+
"name": "python3"
|
203 |
+
},
|
204 |
+
"language_info": {
|
205 |
+
"codemirror_mode": {
|
206 |
+
"name": "ipython",
|
207 |
+
"version": 3
|
208 |
+
},
|
209 |
+
"file_extension": ".py",
|
210 |
+
"mimetype": "text/x-python",
|
211 |
+
"name": "python",
|
212 |
+
"nbconvert_exporter": "python",
|
213 |
+
"pygments_lexer": "ipython3",
|
214 |
+
"version": "3.11.7"
|
215 |
+
}
|
216 |
+
},
|
217 |
+
"nbformat": 4,
|
218 |
+
"nbformat_minor": 2
|
219 |
+
}
|