Spaces:

asach
/

Catalog-Digitization

Sleeping

App Files Files Community

Vrushali commited on Feb 10, 2024

Commit

1dc30e5

1 Parent(s): 4459b32

Update requirements.txt, base.py, settings.py, llm_vision.py, product_description.py, and vectorsearch.py

Browse files

Files changed (7) hide show

src/app/api/module/image.ipynb +9 -48
src/app/api/module/llm_vision.py +19 -1
src/app/api/module/product_description.py +28 -12
src/app/api/module/prompts/base.py +10 -1
src/app/api/module/vectorsearch.py +47 -28
src/app/main/settings.py +8 -3
src/requirements.txt +0 -2

src/app/api/module/image.ipynb CHANGED Viewed

@@ -2,20 +2,9 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "ename": "",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31mRunning cells with 'catlognew' requires the ipykernel package.\n",
-      "\u001b[1;31mRun the following command to install 'ipykernel' into the Python environment. \n",
-      "\u001b[1;31mCommand: 'conda install -n catlognew ipykernel --update-deps --force-reinstall'"
-     ]
-    }
-   ],
    "source": [
     "import cv2\n",
     "import os\n",
@@ -29,20 +18,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "ename": "",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31mFailed to start the Kernel. \n",
-      "\u001b[1;31mUnable to start Kernel 'catlognew (Python)' due to a connection timeout. \n",
-      "\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
-     ]
-    }
-   ],
    "source": [
     "image_path = r\"data/remove_flash.jpg\""
    ]
@@ -152,33 +130,16 @@
      "output_type": "error",
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mImportError\u001b[0m                               Traceback (most recent call last)",
-      "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/langchain_community/vectorstores/chroma.py:81\u001b[0m, in \u001b[0;36mChroma.__init__\u001b[0;34m(self, collection_name, embedding_function, persist_directory, client_settings, collection_metadata, client, relevance_score_fn)\u001b[0m\n\u001b[1;32m     80\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 81\u001b[0m     \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\n\u001b[1;32m     82\u001b[0m     \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m\n",
-      "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/chromadb/__init__.py:5\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mapi\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mclient\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AdminClient \u001b[38;5;28;01mas\u001b[39;00m AdminClientCreator\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mauth\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtoken\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TokenTransportHeader\n\u001b[1;32m      6\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m\n",
-      "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/chromadb/auth/token/__init__.py:26\u001b[0m\n\u001b[1;32m     25\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m System\n\u001b[0;32m---> 26\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtelemetry\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mopentelemetry\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m     27\u001b[0m     OpenTelemetryGranularity,\n\u001b[1;32m     28\u001b[0m     trace_method,\n\u001b[1;32m     29\u001b[0m )\n\u001b[1;32m     30\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m get_class\n",
-      "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/chromadb/telemetry/opentelemetry/__init__.py:5\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Any, Callable, Dict, Optional, Sequence, Union\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopentelemetry\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m trace\n\u001b[1;32m      6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopentelemetry\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msdk\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mresources\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m SERVICE_NAME, Resource\n",
-      "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/opentelemetry/trace/__init__.py:87\u001b[0m\n\u001b[1;32m     85\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdeprecated\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m deprecated\n\u001b[0;32m---> 87\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopentelemetry\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m context \u001b[38;5;28;01mas\u001b[39;00m context_api\n\u001b[1;32m     88\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopentelemetry\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mattributes\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m BoundedAttributes  \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n",
-      "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/opentelemetry/context/__init__.py:25\u001b[0m\n\u001b[1;32m     24\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopentelemetry\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01menvironment_variables\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m OTEL_PYTHON_CONTEXT\n\u001b[0;32m---> 25\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopentelemetry\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutil\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_importlib_metadata\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m entry_points\n\u001b[1;32m     27\u001b[0m logger \u001b[38;5;241m=\u001b[39m logging\u001b[38;5;241m.\u001b[39mgetLogger(\u001b[38;5;18m__name__\u001b[39m)\n",
-      "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/opentelemetry/util/_importlib_metadata.py:17\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;66;03m# Copyright The OpenTelemetry Authors\u001b[39;00m\n\u001b[1;32m      2\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[1;32m      3\u001b[0m \u001b[38;5;66;03m# Licensed under the Apache License, Version 2.0 (the \"License\");\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     15\u001b[0m \u001b[38;5;66;03m# FIXME: Use importlib.metadata when support for 3.11 is dropped if the rest of\u001b[39;00m\n\u001b[1;32m     16\u001b[0m \u001b[38;5;66;03m# the supported versions at that time have the same API.\u001b[39;00m\n\u001b[0;32m---> 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mimportlib_metadata\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (  \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m     18\u001b[0m     EntryPoint,\n\u001b[1;32m     19\u001b[0m     EntryPoints,\n\u001b[1;32m     20\u001b[0m     entry_points,\n\u001b[1;32m     21\u001b[0m     version,\n\u001b[1;32m     22\u001b[0m )\n\u001b[1;32m     24\u001b[0m \u001b[38;5;66;03m# The importlib-metadata library has introduced breaking changes before to its\u001b[39;00m\n\u001b[1;32m     25\u001b[0m \u001b[38;5;66;03m# API, this module is kept just to act as a layer between the\u001b[39;00m\n\u001b[1;32m     26\u001b[0m \u001b[38;5;66;03m# importlib-metadata library and our project if in any case it is necessary to\u001b[39;00m\n\u001b[1;32m     27\u001b[0m \u001b[38;5;66;03m# do so.\u001b[39;00m\n",
-      "\u001b[0;31mImportError\u001b[0m: cannot import name 'EntryPoint' from 'importlib_metadata' (unknown location)",
       "\nDuring handling of the above exception, another exception occurred:\n",
       "\u001b[0;31mImportError\u001b[0m                               Traceback (most recent call last)",
       "Cell \u001b[0;32mIn[10], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m name \u001b[38;5;241m=\u001b[39m response[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbrand\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m response[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtype_of_product\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m      2\u001b[0m name \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBRU\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 3\u001b[0m get_prod_name_db \u001b[38;5;241m=\u001b[39m \u001b[43msearch\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n",
       "File \u001b[0;32m~/Catalog-Digitization-/src/app/api/module/vectorsearch.py:30\u001b[0m, in \u001b[0;36msearch\u001b[0;34m(query)\u001b[0m\n\u001b[1;32m     28\u001b[0m embeddings \u001b[38;5;241m=\u001b[39m OpenAIEmbeddings()\n\u001b[1;32m     29\u001b[0m db_path \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(file_Directory,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvectorstore\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 30\u001b[0m db \u001b[38;5;241m=\u001b[39m \u001b[43mChroma\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpersist_directory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mdb_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43membedding_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43membeddings\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     31\u001b[0m embedding_vector \u001b[38;5;241m=\u001b[39m OpenAIEmbeddings()\u001b[38;5;241m.\u001b[39membed_query(query)\n\u001b[1;32m     32\u001b[0m docs \u001b[38;5;241m=\u001b[39m db\u001b[38;5;241m.\u001b[39msimilarity_search_by_vector(embedding_vector)\n",
-      "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/langchain_community/vectorstores/chroma.py:84\u001b[0m, in \u001b[0;36mChroma.__init__\u001b[0;34m(self, collection_name, embedding_function, persist_directory, client_settings, collection_metadata, client, relevance_score_fn)\u001b[0m\n\u001b[1;32m     82\u001b[0m     \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m\n\u001b[1;32m     83\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m:\n\u001b[0;32m---> 84\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(\n\u001b[1;32m     85\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCould not import chromadb python package. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     86\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPlease install it with `pip install chromadb`.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     87\u001b[0m     )\n\u001b[1;32m     89\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m client \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m     90\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_client_settings \u001b[38;5;241m=\u001b[39m client_settings\n",
       "\u001b[0;31mImportError\u001b[0m: Could not import chromadb python package. Please install it with `pip install chromadb`."
      ]
-    },
-    {
-     "ename": "",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
-      "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
-      "\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
-      "\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
-     ]
     }
    ],
    "source": [
@@ -212,7 +173,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.0"
   }
  },
  "nbformat": 4,

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": 1,
    "metadata": {},
+   "outputs": [],
    "source": [
     "import cv2\n",
     "import os\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 2,
    "metadata": {},
+   "outputs": [],
    "source": [
     "image_path = r\"data/remove_flash.jpg\""
    ]
      "output_type": "error",
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
+      "File \u001b[0;32m~/miniconda3/envs/catlognew/lib/python3.10/site-packages/langchain_community/vectorstores/chroma.py:81\u001b[0m, in \u001b[0;36mChroma.__init__\u001b[0;34m(self, collection_name, embedding_function, persist_directory, client_settings, collection_metadata, client, relevance_score_fn)\u001b[0m\n\u001b[1;32m     80\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 81\u001b[0m     \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\n\u001b[1;32m     82\u001b[0m     \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m\n",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'chromadb'",
       "\nDuring handling of the above exception, another exception occurred:\n",
       "\u001b[0;31mImportError\u001b[0m                               Traceback (most recent call last)",
       "Cell \u001b[0;32mIn[10], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m name \u001b[38;5;241m=\u001b[39m response[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbrand\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m response[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtype_of_product\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m      2\u001b[0m name \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBRU\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 3\u001b[0m get_prod_name_db \u001b[38;5;241m=\u001b[39m \u001b[43msearch\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n",
       "File \u001b[0;32m~/Catalog-Digitization-/src/app/api/module/vectorsearch.py:30\u001b[0m, in \u001b[0;36msearch\u001b[0;34m(query)\u001b[0m\n\u001b[1;32m     28\u001b[0m embeddings \u001b[38;5;241m=\u001b[39m OpenAIEmbeddings()\n\u001b[1;32m     29\u001b[0m db_path \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(file_Directory,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvectorstore\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 30\u001b[0m db \u001b[38;5;241m=\u001b[39m \u001b[43mChroma\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpersist_directory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mdb_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43membedding_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43membeddings\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     31\u001b[0m embedding_vector \u001b[38;5;241m=\u001b[39m OpenAIEmbeddings()\u001b[38;5;241m.\u001b[39membed_query(query)\n\u001b[1;32m     32\u001b[0m docs \u001b[38;5;241m=\u001b[39m db\u001b[38;5;241m.\u001b[39msimilarity_search_by_vector(embedding_vector)\n",
+      "File \u001b[0;32m~/miniconda3/envs/catlognew/lib/python3.10/site-packages/langchain_community/vectorstores/chroma.py:84\u001b[0m, in \u001b[0;36mChroma.__init__\u001b[0;34m(self, collection_name, embedding_function, persist_directory, client_settings, collection_metadata, client, relevance_score_fn)\u001b[0m\n\u001b[1;32m     82\u001b[0m     \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m\n\u001b[1;32m     83\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m:\n\u001b[0;32m---> 84\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(\n\u001b[1;32m     85\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCould not import chromadb python package. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     86\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPlease install it with `pip install chromadb`.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     87\u001b[0m     )\n\u001b[1;32m     89\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m client \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m     90\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_client_settings \u001b[38;5;241m=\u001b[39m client_settings\n",
       "\u001b[0;31mImportError\u001b[0m: Could not import chromadb python package. Please install it with `pip install chromadb`."
      ]
     }
    ],
    "source": [
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
+   "version": "3.10.13"
   }
  },
  "nbformat": 4,

src/app/api/module/llm_vision.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import base64
 import requests
 from config import OPENAI_API_KEY
 import os
@@ -55,4 +56,21 @@ class OpenAIVision:
         }
         response = requests.post(self.base_url, headers=headers, json=payload)
-        return response.json()

 import base64
 import requests
 from config import OPENAI_API_KEY
+from openai import OpenAI
 import os
         }
         response = requests.post(self.base_url, headers=headers, json=payload)
+        return response.json()
+    def getname(self , prompt):
+        client = OpenAI()
+        completion = client.chat.completions.create(
+        model="gpt-3.5-turbo",
+        messages=[
+            {"role": "user", "content": prompt}
+        ]
+        )
+        return completion.choices[0].message

src/app/api/module/product_description.py CHANGED Viewed

@@ -4,23 +4,39 @@ import matplotlib.pyplot as plt
 import numpy as np
 from llm_vision import OpenAIVision
 from ocr import azure_ocr
-from prompts.base import base_prompt
 from utils import  extract_json_from_text
 from vectorsearch import search , get_detail_df
-def get_product_description(image_path):
-    details = azure_ocr(image_path)
     prompt = base_prompt.format(text = details)
     obj = OpenAIVision()
-    json = obj.get_image_description(image_path,prompt)
-    response =  extract_json_from_text(json['choices'][0]['message']['content'])
     return response
 def add_in_db(response):
-    name = response['brand'] + " " + response['type_of_product']
-    get_prod_name_db = search(name)
-    name = get_detail_df(get_prod_name_db)
-    ### Add things into database

 import numpy as np
 from llm_vision import OpenAIVision
 from ocr import azure_ocr
+from prompts.base import base_prompt, gpt3
 from utils import  extract_json_from_text
 from vectorsearch import search , get_detail_df
+import json
+def get_details(image_path , details):   ### If product is not in database
     prompt = base_prompt.format(text = details)
     obj = OpenAIVision()
+    jsontext = obj.get_image_description(image_path,prompt)
+    response =  extract_json_from_text(jsontext['choices'][0]['message']['content'])
+    ##add
     return response
+def get_name(image_path):   ### If product is in database
+    details = azure_ocr(image_path)
+    prompt = gpt3.format(text = details)
+    obj = OpenAIVision()
+    name = obj.getname(prompt)
+    jsontext = json.loads(name.content)
+    print(jsontext)
+    product_name = jsontext['product_name']
+    get_prod_name_db = search(product_name)
+    # if name not in db:
+    #     response = get_details(image_path, details)
+    #     add_in_db(response)
+    # else:
+    #     add_in_db(get_prod_name_db)
 def add_in_db(response):
+    pass
+if __name__ == "__main__":
+    image_path = r"data/remove_flash.jpg"
+    get_name(image_path)

src/app/api/module/prompts/base.py CHANGED Viewed

@@ -32,4 +32,13 @@ base_prompt = dedent("""
         Analyse data from the above product description to give me the following details in JSON format:
         Only return the output in the required json format.
-        """)

         Analyse data from the above product description to give me the following details in JSON format:
         Only return the output in the required json format.
+        """)
+gpt3 = dedent(""" I am providing you with a OCR text about a product.
+              OCR TEXT : {text}
+              I want you to provide me with the name of prodcut in following JSON format:
+               "product_name" : "BRU instant coffee".
+              """)

src/app/api/module/vectorsearch.py CHANGED Viewed

@@ -5,44 +5,63 @@ from langchain_openai import OpenAIEmbeddings
 from langchain.text_splitter import CharacterTextSplitter
 from langchain_community.vectorstores import Chroma
 import pandas as pd
 os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
-# df = pd.read_excel(r"/home/vrush/Catalog-Digitization-/src/module/data/Catalog Digitization/ONDC Test Data _ Images/ONDCSampleData.xlsx")
-# df_new = pd.DataFrame(columns=["id", "name"])
-# df_new =  df['name']
-# df_new.to_csv(r"data/data.csv", index=False)
-def create_vector():
-    loader = CSVLoader(file_path="data/data.csv")
-    docs = loader.load()
-    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
-    documents = text_splitter.split_documents(docs)
-    db_path = os.path.join(file_Directory,"vectorstore")
-    embeddings = OpenAIEmbeddings()
-    os.makedirs(db_path, exist_ok=True)
-    Chroma.from_documents(docs, embeddings, persist_directory= db_path)
 def search(query):
-    embeddings = OpenAIEmbeddings()
-    db_path = os.path.join(file_Directory,"vectorstore")
-    db = Chroma(persist_directory= db_path, embedding_function= embeddings)
-    embedding_vector = OpenAIEmbeddings().embed_query(query)
-    docs = db.similarity_search_by_vector(embedding_vector)
-    print(docs[0].page_content)
-    return docs[0].page_content
 def get_detail_df(name):
-    df = pd.read_excel(r"/home/vrush/Catalog-Digitization-/src/module/data/Catalog Digitization/ONDC Test Data _ Images/ONDCSampleData.xlsx")
-    for item in df.iterrows():
-        if item['name'] == name:
             return item
         else:
-            return None
 if __name__ == "__main__":
-    create_vector()
-    name = search("Choco Creme Wafers")
-    print(get_detail_df(name))

 from langchain.text_splitter import CharacterTextSplitter
 from langchain_community.vectorstores import Chroma
 import pandas as pd
+import chromadb,uuid
+from chromadb.utils import embedding_functions
 os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
+db_path = os.path.join(file_Directory,"vectorstore")
+client = chromadb.PersistentClient(path=db_path)
+def generate_uuid():
+    return str(uuid.uuid4())
+emmbedding_model = "text-embedding-3-large"
+openai_ef = embedding_functions.OpenAIEmbeddingFunction(model_name=emmbedding_model,api_key=OPENAI_API_KEY)
+collection = client.get_or_create_collection(name="products")
+def add_document_chroma_collection(collection_object, document_list, embedding_list, metadata):
+    metadata_list = [metadata for i in range(len(document_list))]
+    ids_gen = [generate_uuid() for i in range(len(document_list))]
+    collection_object.add(embeddings = embedding_list,documents = document_list,metadatas = metadata_list , ids = ids_gen)
+    if collection_object:
+        return True
+def create_vector():
+    df = pd.read_csv(r"/home/vrush/Catalog-Digitization-/src/app/api/module/data/data.csv")
+    for i , items in df.iterrows():
+        print(items['name'])
+        metadata = {"empty":""}
+        doc_embed = openai_ef([items['name']])
+        add_document_chroma_collection(collection_object = collection, document_list = [items["name"]], embedding_list = doc_embed ,metadata = metadata)
 def search(query):
+    embbed_text_search = openai_ef(query)
+    data = collection.query(query_embeddings = embbed_text_search, n_results=10)
+    return data
 def get_detail_df(name):
+    print(name)
+    df = pd.read_excel(r"/home/vrush/Catalog-Digitization-/src/app/api/module/data/Catalog/Data_Images/ONDCSampleData.xlsx")
+    for i,item in df.iterrows():
+        if str(item['name']) == str(name).split(":")[1].strip():
             return item
         else:
+            continue
 if __name__ == "__main__":
+    # create_vector()
+    name = search("Atta")
+    print(name)
+    # # # print(get_detail_df(name))

src/app/main/settings.py CHANGED Viewed

@@ -78,12 +78,17 @@ WSGI_APPLICATION = 'main.wsgi.application'
 DATABASES = {
     'default': {
-        'ENGINE': 'django.db.backends.sqlite3',
-        'NAME': BASE_DIR / 'db.sqlite3',
     }
 }
 # Password validation
 # https://docs.djangoproject.com/en/5.0/ref/settings/#auth-password-validators

 DATABASES = {
     'default': {
+        'ENGINE': 'django.db.backends.mysql',
+        # 'ENGINE': 'mysql.connector.django',
+        'NAME': 'test2',
+        'USER': 'cosmosgcp',
+        'PASSWORD': '$Bonsai999',
+        'HOST': '34.122.223.224',
+        'PORT': '3306',
+        'OPTIONS': {'charset': 'utf8mb4','auth_plugin': 'mysql_native_password'},
     }
 }
 # Password validation
 # https://docs.djangoproject.com/en/5.0/ref/settings/#auth-password-validators

src/requirements.txt CHANGED Viewed

@@ -1,10 +1,8 @@
-gradio==4.17.0
 langchain==0.1.6
 python-decouple==3.4
 pandas
 azure-ai-formrecognizer
 easyocr
-langchain
 chromadb
 langchain_openai
 unstructured

 langchain==0.1.6
 python-decouple==3.4
 pandas
 azure-ai-formrecognizer
 easyocr
 chromadb
 langchain_openai
 unstructured