Vrushali commited on
Commit
1dc30e5
·
1 Parent(s): 4459b32

Update requirements.txt, base.py, settings.py, llm_vision.py, product_description.py, and vectorsearch.py

Browse files
src/app/api/module/image.ipynb CHANGED
@@ -2,20 +2,9 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": null,
6
  "metadata": {},
7
- "outputs": [
8
- {
9
- "ename": "",
10
- "evalue": "",
11
- "output_type": "error",
12
- "traceback": [
13
- "\u001b[1;31mRunning cells with 'catlognew' requires the ipykernel package.\n",
14
- "\u001b[1;31mRun the following command to install 'ipykernel' into the Python environment. \n",
15
- "\u001b[1;31mCommand: 'conda install -n catlognew ipykernel --update-deps --force-reinstall'"
16
- ]
17
- }
18
- ],
19
  "source": [
20
  "import cv2\n",
21
  "import os\n",
@@ -29,20 +18,9 @@
29
  },
30
  {
31
  "cell_type": "code",
32
- "execution_count": null,
33
  "metadata": {},
34
- "outputs": [
35
- {
36
- "ename": "",
37
- "evalue": "",
38
- "output_type": "error",
39
- "traceback": [
40
- "\u001b[1;31mFailed to start the Kernel. \n",
41
- "\u001b[1;31mUnable to start Kernel 'catlognew (Python)' due to a connection timeout. \n",
42
- "\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
43
- ]
44
- }
45
- ],
46
  "source": [
47
  "image_path = r\"data/remove_flash.jpg\""
48
  ]
@@ -152,33 +130,16 @@
152
  "output_type": "error",
153
  "traceback": [
154
  "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
155
- "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
156
- "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/langchain_community/vectorstores/chroma.py:81\u001b[0m, in \u001b[0;36mChroma.__init__\u001b[0;34m(self, collection_name, embedding_function, persist_directory, client_settings, collection_metadata, client, relevance_score_fn)\u001b[0m\n\u001b[1;32m 80\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 81\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\n\u001b[1;32m 82\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m\n",
157
- "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/chromadb/__init__.py:5\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mapi\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mclient\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AdminClient \u001b[38;5;28;01mas\u001b[39;00m AdminClientCreator\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mauth\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtoken\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TokenTransportHeader\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m\n",
158
- "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/chromadb/auth/token/__init__.py:26\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m System\n\u001b[0;32m---> 26\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtelemetry\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mopentelemetry\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 27\u001b[0m OpenTelemetryGranularity,\n\u001b[1;32m 28\u001b[0m trace_method,\n\u001b[1;32m 29\u001b[0m )\n\u001b[1;32m 30\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m get_class\n",
159
- "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/chromadb/telemetry/opentelemetry/__init__.py:5\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Any, Callable, Dict, Optional, Sequence, Union\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopentelemetry\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m trace\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopentelemetry\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msdk\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mresources\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m SERVICE_NAME, Resource\n",
160
- "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/opentelemetry/trace/__init__.py:87\u001b[0m\n\u001b[1;32m 85\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdeprecated\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m deprecated\n\u001b[0;32m---> 87\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopentelemetry\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m context \u001b[38;5;28;01mas\u001b[39;00m context_api\n\u001b[1;32m 88\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopentelemetry\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mattributes\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m BoundedAttributes \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n",
161
- "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/opentelemetry/context/__init__.py:25\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopentelemetry\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01menvironment_variables\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m OTEL_PYTHON_CONTEXT\n\u001b[0;32m---> 25\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopentelemetry\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutil\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_importlib_metadata\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m entry_points\n\u001b[1;32m 27\u001b[0m logger \u001b[38;5;241m=\u001b[39m logging\u001b[38;5;241m.\u001b[39mgetLogger(\u001b[38;5;18m__name__\u001b[39m)\n",
162
- "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/opentelemetry/util/_importlib_metadata.py:17\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Copyright The OpenTelemetry Authors\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# Licensed under the Apache License, Version 2.0 (the \"License\");\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m# FIXME: Use importlib.metadata when support for 3.11 is dropped if the rest of\u001b[39;00m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;66;03m# the supported versions at that time have the same API.\u001b[39;00m\n\u001b[0;32m---> 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mimportlib_metadata\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ( \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 18\u001b[0m EntryPoint,\n\u001b[1;32m 19\u001b[0m EntryPoints,\n\u001b[1;32m 20\u001b[0m entry_points,\n\u001b[1;32m 21\u001b[0m version,\n\u001b[1;32m 22\u001b[0m )\n\u001b[1;32m 24\u001b[0m \u001b[38;5;66;03m# The importlib-metadata library has introduced breaking changes before to its\u001b[39;00m\n\u001b[1;32m 25\u001b[0m \u001b[38;5;66;03m# API, this module is kept just to act as a layer between the\u001b[39;00m\n\u001b[1;32m 26\u001b[0m \u001b[38;5;66;03m# importlib-metadata library and our project if in any case it is necessary to\u001b[39;00m\n\u001b[1;32m 27\u001b[0m \u001b[38;5;66;03m# do so.\u001b[39;00m\n",
163
- "\u001b[0;31mImportError\u001b[0m: cannot import name 'EntryPoint' from 'importlib_metadata' (unknown location)",
164
  "\nDuring handling of the above exception, another exception occurred:\n",
165
  "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
166
  "Cell \u001b[0;32mIn[10], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m name \u001b[38;5;241m=\u001b[39m response[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbrand\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m response[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtype_of_product\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m 2\u001b[0m name \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBRU\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 3\u001b[0m get_prod_name_db \u001b[38;5;241m=\u001b[39m \u001b[43msearch\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n",
167
  "File \u001b[0;32m~/Catalog-Digitization-/src/app/api/module/vectorsearch.py:30\u001b[0m, in \u001b[0;36msearch\u001b[0;34m(query)\u001b[0m\n\u001b[1;32m 28\u001b[0m embeddings \u001b[38;5;241m=\u001b[39m OpenAIEmbeddings()\n\u001b[1;32m 29\u001b[0m db_path \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(file_Directory,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvectorstore\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 30\u001b[0m db \u001b[38;5;241m=\u001b[39m \u001b[43mChroma\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpersist_directory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mdb_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43membedding_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43membeddings\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 31\u001b[0m embedding_vector \u001b[38;5;241m=\u001b[39m OpenAIEmbeddings()\u001b[38;5;241m.\u001b[39membed_query(query)\n\u001b[1;32m 32\u001b[0m docs \u001b[38;5;241m=\u001b[39m db\u001b[38;5;241m.\u001b[39msimilarity_search_by_vector(embedding_vector)\n",
168
- "File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/langchain_community/vectorstores/chroma.py:84\u001b[0m, in \u001b[0;36mChroma.__init__\u001b[0;34m(self, collection_name, embedding_function, persist_directory, client_settings, collection_metadata, client, relevance_score_fn)\u001b[0m\n\u001b[1;32m 82\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m\n\u001b[1;32m 83\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m:\n\u001b[0;32m---> 84\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(\n\u001b[1;32m 85\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCould not import chromadb python package. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 86\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPlease install it with `pip install chromadb`.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 87\u001b[0m )\n\u001b[1;32m 89\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m client \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 90\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_client_settings \u001b[38;5;241m=\u001b[39m client_settings\n",
169
  "\u001b[0;31mImportError\u001b[0m: Could not import chromadb python package. Please install it with `pip install chromadb`."
170
  ]
171
- },
172
- {
173
- "ename": "",
174
- "evalue": "",
175
- "output_type": "error",
176
- "traceback": [
177
- "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
178
- "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
179
- "\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
180
- "\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
181
- ]
182
  }
183
  ],
184
  "source": [
@@ -212,7 +173,7 @@
212
  "name": "python",
213
  "nbconvert_exporter": "python",
214
  "pygments_lexer": "ipython3",
215
- "version": "3.10.0"
216
  }
217
  },
218
  "nbformat": 4,
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {},
7
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
8
  "source": [
9
  "import cv2\n",
10
  "import os\n",
 
18
  },
19
  {
20
  "cell_type": "code",
21
+ "execution_count": 2,
22
  "metadata": {},
23
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
24
  "source": [
25
  "image_path = r\"data/remove_flash.jpg\""
26
  ]
 
130
  "output_type": "error",
131
  "traceback": [
132
  "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
133
+ "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
134
+ "File \u001b[0;32m~/miniconda3/envs/catlognew/lib/python3.10/site-packages/langchain_community/vectorstores/chroma.py:81\u001b[0m, in \u001b[0;36mChroma.__init__\u001b[0;34m(self, collection_name, embedding_function, persist_directory, client_settings, collection_metadata, client, relevance_score_fn)\u001b[0m\n\u001b[1;32m 80\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 81\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\n\u001b[1;32m 82\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m\n",
135
+ "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'chromadb'",
 
 
 
 
 
 
136
  "\nDuring handling of the above exception, another exception occurred:\n",
137
  "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
138
  "Cell \u001b[0;32mIn[10], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m name \u001b[38;5;241m=\u001b[39m response[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbrand\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m response[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtype_of_product\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m 2\u001b[0m name \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBRU\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 3\u001b[0m get_prod_name_db \u001b[38;5;241m=\u001b[39m \u001b[43msearch\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n",
139
  "File \u001b[0;32m~/Catalog-Digitization-/src/app/api/module/vectorsearch.py:30\u001b[0m, in \u001b[0;36msearch\u001b[0;34m(query)\u001b[0m\n\u001b[1;32m 28\u001b[0m embeddings \u001b[38;5;241m=\u001b[39m OpenAIEmbeddings()\n\u001b[1;32m 29\u001b[0m db_path \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(file_Directory,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvectorstore\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 30\u001b[0m db \u001b[38;5;241m=\u001b[39m \u001b[43mChroma\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpersist_directory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mdb_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43membedding_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43membeddings\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 31\u001b[0m embedding_vector \u001b[38;5;241m=\u001b[39m OpenAIEmbeddings()\u001b[38;5;241m.\u001b[39membed_query(query)\n\u001b[1;32m 32\u001b[0m docs \u001b[38;5;241m=\u001b[39m db\u001b[38;5;241m.\u001b[39msimilarity_search_by_vector(embedding_vector)\n",
140
+ "File \u001b[0;32m~/miniconda3/envs/catlognew/lib/python3.10/site-packages/langchain_community/vectorstores/chroma.py:84\u001b[0m, in \u001b[0;36mChroma.__init__\u001b[0;34m(self, collection_name, embedding_function, persist_directory, client_settings, collection_metadata, client, relevance_score_fn)\u001b[0m\n\u001b[1;32m 82\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m\n\u001b[1;32m 83\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m:\n\u001b[0;32m---> 84\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(\n\u001b[1;32m 85\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCould not import chromadb python package. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 86\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPlease install it with `pip install chromadb`.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 87\u001b[0m )\n\u001b[1;32m 89\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m client \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 90\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_client_settings \u001b[38;5;241m=\u001b[39m client_settings\n",
141
  "\u001b[0;31mImportError\u001b[0m: Could not import chromadb python package. Please install it with `pip install chromadb`."
142
  ]
 
 
 
 
 
 
 
 
 
 
 
143
  }
144
  ],
145
  "source": [
 
173
  "name": "python",
174
  "nbconvert_exporter": "python",
175
  "pygments_lexer": "ipython3",
176
+ "version": "3.10.13"
177
  }
178
  },
179
  "nbformat": 4,
src/app/api/module/llm_vision.py CHANGED
@@ -1,6 +1,7 @@
1
  import base64
2
  import requests
3
  from config import OPENAI_API_KEY
 
4
  import os
5
 
6
 
@@ -55,4 +56,21 @@ class OpenAIVision:
55
  }
56
 
57
  response = requests.post(self.base_url, headers=headers, json=payload)
58
- return response.json()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import base64
2
  import requests
3
  from config import OPENAI_API_KEY
4
+ from openai import OpenAI
5
  import os
6
 
7
 
 
56
  }
57
 
58
  response = requests.post(self.base_url, headers=headers, json=payload)
59
+ return response.json()
60
+
61
+
62
+ def getname(self , prompt):
63
+ client = OpenAI()
64
+ completion = client.chat.completions.create(
65
+ model="gpt-3.5-turbo",
66
+ messages=[
67
+ {"role": "user", "content": prompt}
68
+ ]
69
+ )
70
+
71
+ return completion.choices[0].message
72
+
73
+
74
+
75
+
76
+
src/app/api/module/product_description.py CHANGED
@@ -4,23 +4,39 @@ import matplotlib.pyplot as plt
4
  import numpy as np
5
  from llm_vision import OpenAIVision
6
  from ocr import azure_ocr
7
- from prompts.base import base_prompt
8
  from utils import extract_json_from_text
9
  from vectorsearch import search , get_detail_df
 
10
 
11
-
12
-
13
- def get_product_description(image_path):
14
- details = azure_ocr(image_path)
15
  prompt = base_prompt.format(text = details)
16
  obj = OpenAIVision()
17
- json = obj.get_image_description(image_path,prompt)
18
- response = extract_json_from_text(json['choices'][0]['message']['content'])
19
-
20
  return response
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  def add_in_db(response):
23
- name = response['brand'] + " " + response['type_of_product']
24
- get_prod_name_db = search(name)
25
- name = get_detail_df(get_prod_name_db)
26
- ### Add things into database
 
 
 
4
  import numpy as np
5
  from llm_vision import OpenAIVision
6
  from ocr import azure_ocr
7
+ from prompts.base import base_prompt, gpt3
8
  from utils import extract_json_from_text
9
  from vectorsearch import search , get_detail_df
10
+ import json
11
 
12
+ def get_details(image_path , details): ### If product is not in database
 
 
 
13
  prompt = base_prompt.format(text = details)
14
  obj = OpenAIVision()
15
+ jsontext = obj.get_image_description(image_path,prompt)
16
+ response = extract_json_from_text(jsontext['choices'][0]['message']['content'])
17
+ ##add
18
  return response
19
 
20
+ def get_name(image_path): ### If product is in database
21
+ details = azure_ocr(image_path)
22
+ prompt = gpt3.format(text = details)
23
+ obj = OpenAIVision()
24
+ name = obj.getname(prompt)
25
+ jsontext = json.loads(name.content)
26
+ print(jsontext)
27
+ product_name = jsontext['product_name']
28
+ get_prod_name_db = search(product_name)
29
+ # if name not in db:
30
+ # response = get_details(image_path, details)
31
+ # add_in_db(response)
32
+ # else:
33
+ # add_in_db(get_prod_name_db)
34
+
35
+
36
  def add_in_db(response):
37
+ pass
38
+
39
+
40
+ if __name__ == "__main__":
41
+ image_path = r"data/remove_flash.jpg"
42
+ get_name(image_path)
src/app/api/module/prompts/base.py CHANGED
@@ -32,4 +32,13 @@ base_prompt = dedent("""
32
 
33
  Analyse data from the above product description to give me the following details in JSON format:
34
  Only return the output in the required json format.
35
- """)
 
 
 
 
 
 
 
 
 
 
32
 
33
  Analyse data from the above product description to give me the following details in JSON format:
34
  Only return the output in the required json format.
35
+ """)
36
+
37
+
38
+ gpt3 = dedent(""" I am providing you with a OCR text about a product.
39
+
40
+ OCR TEXT : {text}
41
+ I want you to provide me with the name of prodcut in following JSON format:
42
+ "product_name" : "BRU instant coffee".
43
+
44
+ """)
src/app/api/module/vectorsearch.py CHANGED
@@ -5,44 +5,63 @@ from langchain_openai import OpenAIEmbeddings
5
  from langchain.text_splitter import CharacterTextSplitter
6
  from langchain_community.vectorstores import Chroma
7
  import pandas as pd
8
-
 
9
  os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
 
 
 
 
 
10
 
 
 
 
 
11
 
12
- # df = pd.read_excel(r"/home/vrush/Catalog-Digitization-/src/module/data/Catalog Digitization/ONDC Test Data _ Images/ONDCSampleData.xlsx")
13
- # df_new = pd.DataFrame(columns=["id", "name"])
14
- # df_new = df['name']
15
- # df_new.to_csv(r"data/data.csv", index=False)
16
 
17
- def create_vector():
18
- loader = CSVLoader(file_path="data/data.csv")
19
- docs = loader.load()
20
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
21
- documents = text_splitter.split_documents(docs)
22
- db_path = os.path.join(file_Directory,"vectorstore")
23
- embeddings = OpenAIEmbeddings()
24
- os.makedirs(db_path, exist_ok=True)
25
- Chroma.from_documents(docs, embeddings, persist_directory= db_path)
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  def search(query):
28
- embeddings = OpenAIEmbeddings()
29
- db_path = os.path.join(file_Directory,"vectorstore")
30
- db = Chroma(persist_directory= db_path, embedding_function= embeddings)
31
- embedding_vector = OpenAIEmbeddings().embed_query(query)
32
- docs = db.similarity_search_by_vector(embedding_vector)
33
- print(docs[0].page_content)
34
- return docs[0].page_content
35
 
36
 
37
  def get_detail_df(name):
38
- df = pd.read_excel(r"/home/vrush/Catalog-Digitization-/src/module/data/Catalog Digitization/ONDC Test Data _ Images/ONDCSampleData.xlsx")
39
- for item in df.iterrows():
40
- if item['name'] == name:
 
41
  return item
42
  else:
43
- return None
 
44
 
45
  if __name__ == "__main__":
46
- create_vector()
47
- name = search("Choco Creme Wafers")
48
- print(get_detail_df(name))
 
 
5
  from langchain.text_splitter import CharacterTextSplitter
6
  from langchain_community.vectorstores import Chroma
7
  import pandas as pd
8
+ import chromadb,uuid
9
+ from chromadb.utils import embedding_functions
10
  os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
11
+ db_path = os.path.join(file_Directory,"vectorstore")
12
+ client = chromadb.PersistentClient(path=db_path)
13
+
14
+ def generate_uuid():
15
+ return str(uuid.uuid4())
16
 
17
+
18
+ emmbedding_model = "text-embedding-3-large"
19
+ openai_ef = embedding_functions.OpenAIEmbeddingFunction(model_name=emmbedding_model,api_key=OPENAI_API_KEY)
20
+ collection = client.get_or_create_collection(name="products")
21
 
 
 
 
 
22
 
23
+ def add_document_chroma_collection(collection_object, document_list, embedding_list, metadata):
24
+ metadata_list = [metadata for i in range(len(document_list))]
25
+ ids_gen = [generate_uuid() for i in range(len(document_list))]
26
+ collection_object.add(embeddings = embedding_list,documents = document_list,metadatas = metadata_list , ids = ids_gen)
27
+ if collection_object:
28
+ return True
 
 
 
29
 
30
+
31
+ def create_vector():
32
+ df = pd.read_csv(r"/home/vrush/Catalog-Digitization-/src/app/api/module/data/data.csv")
33
+ for i , items in df.iterrows():
34
+ print(items['name'])
35
+ metadata = {"empty":""}
36
+ doc_embed = openai_ef([items['name']])
37
+ add_document_chroma_collection(collection_object = collection, document_list = [items["name"]], embedding_list = doc_embed ,metadata = metadata)
38
+
39
+
40
+
41
+
42
+
43
+
44
+
45
  def search(query):
46
+ embbed_text_search = openai_ef(query)
47
+ data = collection.query(query_embeddings = embbed_text_search, n_results=10)
48
+ return data
49
+
50
+
 
 
51
 
52
 
53
  def get_detail_df(name):
54
+ print(name)
55
+ df = pd.read_excel(r"/home/vrush/Catalog-Digitization-/src/app/api/module/data/Catalog/Data_Images/ONDCSampleData.xlsx")
56
+ for i,item in df.iterrows():
57
+ if str(item['name']) == str(name).split(":")[1].strip():
58
  return item
59
  else:
60
+ continue
61
+
62
 
63
  if __name__ == "__main__":
64
+ # create_vector()
65
+ name = search("Atta")
66
+ print(name)
67
+ # # # print(get_detail_df(name))
src/app/main/settings.py CHANGED
@@ -78,12 +78,17 @@ WSGI_APPLICATION = 'main.wsgi.application'
78
 
79
  DATABASES = {
80
  'default': {
81
- 'ENGINE': 'django.db.backends.sqlite3',
82
- 'NAME': BASE_DIR / 'db.sqlite3',
 
 
 
 
 
 
83
  }
84
  }
85
 
86
-
87
  # Password validation
88
  # https://docs.djangoproject.com/en/5.0/ref/settings/#auth-password-validators
89
 
 
78
 
79
  DATABASES = {
80
  'default': {
81
+ 'ENGINE': 'django.db.backends.mysql',
82
+ # 'ENGINE': 'mysql.connector.django',
83
+ 'NAME': 'test2',
84
+ 'USER': 'cosmosgcp',
85
+ 'PASSWORD': '$Bonsai999',
86
+ 'HOST': '34.122.223.224',
87
+ 'PORT': '3306',
88
+ 'OPTIONS': {'charset': 'utf8mb4','auth_plugin': 'mysql_native_password'},
89
  }
90
  }
91
 
 
92
  # Password validation
93
  # https://docs.djangoproject.com/en/5.0/ref/settings/#auth-password-validators
94
 
src/requirements.txt CHANGED
@@ -1,10 +1,8 @@
1
- gradio==4.17.0
2
  langchain==0.1.6
3
  python-decouple==3.4
4
  pandas
5
  azure-ai-formrecognizer
6
  easyocr
7
- langchain
8
  chromadb
9
  langchain_openai
10
  unstructured
 
 
1
  langchain==0.1.6
2
  python-decouple==3.4
3
  pandas
4
  azure-ai-formrecognizer
5
  easyocr
 
6
  chromadb
7
  langchain_openai
8
  unstructured