Spaces:

sebdg
/

discord-bot

Runtime error

App Files Files Community

Sébastien De Greef commited on May 19, 2024

Commit

bde8b55

1 Parent(s): d6fc6f4

chore: Update Dockerfile to use pip3 for installing requirements

Browse files

Files changed (1) hide show

ModelsCatalog.ipynb +219 -0

ModelsCatalog.ipynb ADDED Viewed

	@@ -0,0 +1,219 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 91,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "https://ollama.com/library\n"
+     ]
+    }
+   ],
+   "source": [
+    "from bs4 import BeautifulSoup\n",
+    "from requests import get\n",
+    "# download the HTML content\n",
+    "\n",
+    "base_url = 'https://ollama.com'\n",
+    "library_url = f'{base_url}/library'\n",
+    "print(library_url)\n",
+    "html_content = get(library_url).text\n",
+    "\n",
+    "\n",
+    "# Parse the HTML content with BeautifulSoup\n",
+    "soup = BeautifulSoup(html_content, 'html.parser')\n",
+    "\n",
+    "# Extract all the li elements within the ul\n",
+    "li_items = soup.select('ul[role=\"list\"] > li')\n",
+    "\n",
+    "models = []\n",
+    "\n",
+    "# Iterate over the extracted li elements and print them\n",
+    "for li in li_items:\n",
+    "    # get first a tag text\n",
+    "    sizes = li.div.div.select('span')\n",
+    "    sizes = [size.text for size in sizes]\n",
+    "\n",
+    "    pulls = li.div.select('p')[1].select('span')\n",
+    "    # remove svg tags from pulls\n",
+    "    pulls = [pull.text[:-1] for pull in pulls]\n",
+    "    pulls = pulls[0].split('\\xa0')[0].strip()\n",
+    "\n",
+    "    model = {\n",
+    "        \"name\": li.h2.text.strip(),\n",
+    "        \"description\": li.p.text.strip(),\n",
+    "        \"url\": f\"{base_url}{li.a['href']}\",\n",
+    "        \"params\": sizes,\n",
+    "        \"pulls\": pulls      \n",
+    "    }\n",
+    "    models.append(model)\n",
+    "import json\n",
+    "with open('models.json', 'w', encoding=\"utf-8\") as file:\n",
+    "    file.write(json.dumps(models, indent=4, ensure_ascii=False))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 97,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Model: llama3\n",
+      "Model: phi3\n",
+      "Model: wizardlm2\n",
+      "Model: mistral\n",
+      "Model: gemma\n",
+      "Model: mixtral\n",
+      "Model: llama2\n",
+      "Model: codegemma\n",
+      "Model: command-r\n",
+      "Model: command-r-plus\n",
+      "Model: llava\n",
+      "Model: dbrx\n",
+      "Model: codellama\n",
+      "Model: qwen\n",
+      "Model: dolphin-mixtral\n",
+      "Model: llama2-uncensored\n",
+      "Model: deepseek-coder\n",
+      "Model: mistral-openorca\n",
+      "Model: nomic-embed-text\n",
+      "Model: dolphin-mistral\n",
+      "Model: phi\n",
+      "Model: orca-mini\n",
+      "Model: nous-hermes2\n",
+      "Model: zephyr\n",
+      "Model: llama2-chinese\n",
+      "Model: wizard-vicuna-uncensored\n",
+      "Model: starcoder2\n",
+      "Model: vicuna\n",
+      "Model: tinyllama\n",
+      "Model: openhermes\n",
+      "Model: starcoder\n",
+      "Model: openchat\n",
+      "Model: dolphin-llama3\n",
+      "Model: yi\n",
+      "Model: tinydolphin\n",
+      "Model: wizardcoder\n",
+      "Model: stable-code\n",
+      "Model: mxbai-embed-large\n",
+      "Model: neural-chat\n",
+      "Model: phind-codellama\n",
+      "Model: wizard-math\n",
+      "Model: starling-lm\n",
+      "Model: falcon\n",
+      "Model: dolphincoder\n",
+      "Model: nous-hermes\n",
+      "Model: orca2\n",
+      "Model: sqlcoder\n",
+      "Model: stablelm2\n",
+      "Model: dolphin-phi\n",
+      "Model: solar\n",
+      "Model: yarn-llama2\n",
+      "Model: deepseek-llm\n",
+      "Model: codeqwen\n",
+      "Model: bakllava\n",
+      "Model: all-minilm\n",
+      "Model: samantha-mistral\n",
+      "Model: llama3-gradient\n",
+      "Model: medllama2\n",
+      "Model: wizardlm-uncensored\n",
+      "Model: xwinlm\n",
+      "Model: nous-hermes2-mixtral\n",
+      "Model: stable-beluga\n",
+      "Model: wizardlm\n",
+      "Model: codeup\n",
+      "Model: yarn-mistral\n",
+      "Model: everythinglm\n",
+      "Model: meditron\n",
+      "Model: llama-pro\n",
+      "Model: magicoder\n",
+      "Model: stablelm-zephyr\n",
+      "Model: nexusraven\n",
+      "Model: codebooga\n",
+      "Model: mistrallite\n",
+      "Model: llama3-chatqa\n",
+      "Model: wizard-vicuna\n",
+      "Model: snowflake-arctic-embed\n",
+      "Model: llava-llama3\n",
+      "Model: goliath\n",
+      "Model: open-orca-platypus2\n",
+      "Model: moondream\n",
+      "Model: duckdb-nsql\n",
+      "Model: notux\n",
+      "Model: megadolphin\n",
+      "Model: notus\n",
+      "Model: alfred\n",
+      "Model: llava-phi3\n",
+      "Model: falcon2\n"
+     ]
+    }
+   ],
+   "source": [
+    "for model in models:\n",
+    "    tagsurl = f\"{model['url']}/tags\"\n",
+    "    tags_page = get(tagsurl).text\n",
+    "    # Parse the HTML content with BeautifulSoup\n",
+    "    soup = BeautifulSoup(tags_page, 'html.parser')\n",
+    "    # select links with the class group\n",
+    "    tags = soup.select('a.group')\n",
+    "    print(f\"Model: {model['name']}\")\n",
+    "    model_tags = []\n",
+    "    for tag in tags:\n",
+    "        # get the parent div of the tag\n",
+    "        parent = tag.parent\n",
+    "        sizes = parent.parent.select('div.items-baseline')[0].text.strip().split(' • ',2)\n",
+    "        # strip each size\n",
+    "        sizes = [size.strip() for size in sizes]\n",
+    "        model_tags.append({\n",
+    "            \"name\": tag.text.strip(),\n",
+    "            \"url\": f\"{base_url}{tag['href']}\",\n",
+    "            \"size\": sizes[1],\n",
+    "            \"hash\": sizes[0],\n",
+    "            \"updated\": sizes[2],\n",
+    "        })\n",
+    "        link = tag['href']\n",
+    "        #print(sizes,\"----\")\n",
+    "        # get the next sibling of the parent div\n",
+    "        sibling = parent.select('span')\n",
+    "        if len(sibling) == 1:\n",
+    "            hash = sibling[0].text.strip()\n",
+    "            if len(sibling) == 3:\n",
+    "                size = sibling[2].strip()\n",
+    "            else:\n",
+    "                pass\n",
+    "                #print(sibling)\n",
+    "    model[\"tags\"] = model_tags\n",
+    "with open('models.json', 'w', encoding=\"utf-8\") as file:\n",
+    "    file.write(json.dumps(models, indent=4, ensure_ascii=False))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}