Spaces:
Sleeping
Sleeping
Update requirements.txt, base.py, settings.py, llm_vision.py, product_description.py, and vectorsearch.py
Browse files- src/app/api/module/image.ipynb +9 -48
- src/app/api/module/llm_vision.py +19 -1
- src/app/api/module/product_description.py +28 -12
- src/app/api/module/prompts/base.py +10 -1
- src/app/api/module/vectorsearch.py +47 -28
- src/app/main/settings.py +8 -3
- src/requirements.txt +0 -2
src/app/api/module/image.ipynb
CHANGED
@@ -2,20 +2,9 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {},
|
7 |
-
"outputs": [
|
8 |
-
{
|
9 |
-
"ename": "",
|
10 |
-
"evalue": "",
|
11 |
-
"output_type": "error",
|
12 |
-
"traceback": [
|
13 |
-
"\u001b[1;31mRunning cells with 'catlognew' requires the ipykernel package.\n",
|
14 |
-
"\u001b[1;31mRun the following command to install 'ipykernel' into the Python environment. \n",
|
15 |
-
"\u001b[1;31mCommand: 'conda install -n catlognew ipykernel --update-deps --force-reinstall'"
|
16 |
-
]
|
17 |
-
}
|
18 |
-
],
|
19 |
"source": [
|
20 |
"import cv2\n",
|
21 |
"import os\n",
|
@@ -29,20 +18,9 @@
|
|
29 |
},
|
30 |
{
|
31 |
"cell_type": "code",
|
32 |
-
"execution_count":
|
33 |
"metadata": {},
|
34 |
-
"outputs": [
|
35 |
-
{
|
36 |
-
"ename": "",
|
37 |
-
"evalue": "",
|
38 |
-
"output_type": "error",
|
39 |
-
"traceback": [
|
40 |
-
"\u001b[1;31mFailed to start the Kernel. \n",
|
41 |
-
"\u001b[1;31mUnable to start Kernel 'catlognew (Python)' due to a connection timeout. \n",
|
42 |
-
"\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
|
43 |
-
]
|
44 |
-
}
|
45 |
-
],
|
46 |
"source": [
|
47 |
"image_path = r\"data/remove_flash.jpg\""
|
48 |
]
|
@@ -152,33 +130,16 @@
|
|
152 |
"output_type": "error",
|
153 |
"traceback": [
|
154 |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
155 |
-
"\u001b[0;
|
156 |
-
"File \u001b[0;32m~/miniconda3/envs/
|
157 |
-
"
|
158 |
-
"File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/chromadb/auth/token/__init__.py:26\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m System\n\u001b[0;32m---> 26\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtelemetry\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mopentelemetry\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 27\u001b[0m OpenTelemetryGranularity,\n\u001b[1;32m 28\u001b[0m trace_method,\n\u001b[1;32m 29\u001b[0m )\n\u001b[1;32m 30\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m get_class\n",
|
159 |
-
"File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/chromadb/telemetry/opentelemetry/__init__.py:5\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Any, Callable, Dict, Optional, Sequence, Union\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopentelemetry\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m trace\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopentelemetry\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msdk\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mresources\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m SERVICE_NAME, Resource\n",
|
160 |
-
"File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/opentelemetry/trace/__init__.py:87\u001b[0m\n\u001b[1;32m 85\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdeprecated\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m deprecated\n\u001b[0;32m---> 87\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopentelemetry\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m context \u001b[38;5;28;01mas\u001b[39;00m context_api\n\u001b[1;32m 88\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopentelemetry\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mattributes\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m BoundedAttributes \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n",
|
161 |
-
"File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/opentelemetry/context/__init__.py:25\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopentelemetry\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01menvironment_variables\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m OTEL_PYTHON_CONTEXT\n\u001b[0;32m---> 25\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopentelemetry\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutil\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_importlib_metadata\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m entry_points\n\u001b[1;32m 27\u001b[0m logger \u001b[38;5;241m=\u001b[39m logging\u001b[38;5;241m.\u001b[39mgetLogger(\u001b[38;5;18m__name__\u001b[39m)\n",
|
162 |
-
"File \u001b[0;32m~/miniconda3/envs/catlog/lib/python3.10/site-packages/opentelemetry/util/_importlib_metadata.py:17\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Copyright The OpenTelemetry Authors\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# Licensed under the Apache License, Version 2.0 (the \"License\");\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m# FIXME: Use importlib.metadata when support for 3.11 is dropped if the rest of\u001b[39;00m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;66;03m# the supported versions at that time have the same API.\u001b[39;00m\n\u001b[0;32m---> 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mimportlib_metadata\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ( \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 18\u001b[0m EntryPoint,\n\u001b[1;32m 19\u001b[0m EntryPoints,\n\u001b[1;32m 20\u001b[0m entry_points,\n\u001b[1;32m 21\u001b[0m version,\n\u001b[1;32m 22\u001b[0m )\n\u001b[1;32m 24\u001b[0m \u001b[38;5;66;03m# The importlib-metadata library has introduced breaking changes before to its\u001b[39;00m\n\u001b[1;32m 25\u001b[0m \u001b[38;5;66;03m# API, this module is kept just to act as a layer between the\u001b[39;00m\n\u001b[1;32m 26\u001b[0m \u001b[38;5;66;03m# importlib-metadata library and our project if in any case it is necessary to\u001b[39;00m\n\u001b[1;32m 27\u001b[0m \u001b[38;5;66;03m# do so.\u001b[39;00m\n",
|
163 |
-
"\u001b[0;31mImportError\u001b[0m: cannot import name 'EntryPoint' from 'importlib_metadata' (unknown location)",
|
164 |
"\nDuring handling of the above exception, another exception occurred:\n",
|
165 |
"\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
|
166 |
"Cell \u001b[0;32mIn[10], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m name \u001b[38;5;241m=\u001b[39m response[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbrand\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m response[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtype_of_product\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m 2\u001b[0m name \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBRU\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 3\u001b[0m get_prod_name_db \u001b[38;5;241m=\u001b[39m \u001b[43msearch\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n",
|
167 |
"File \u001b[0;32m~/Catalog-Digitization-/src/app/api/module/vectorsearch.py:30\u001b[0m, in \u001b[0;36msearch\u001b[0;34m(query)\u001b[0m\n\u001b[1;32m 28\u001b[0m embeddings \u001b[38;5;241m=\u001b[39m OpenAIEmbeddings()\n\u001b[1;32m 29\u001b[0m db_path \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(file_Directory,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvectorstore\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 30\u001b[0m db \u001b[38;5;241m=\u001b[39m \u001b[43mChroma\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpersist_directory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mdb_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43membedding_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43membeddings\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 31\u001b[0m embedding_vector \u001b[38;5;241m=\u001b[39m OpenAIEmbeddings()\u001b[38;5;241m.\u001b[39membed_query(query)\n\u001b[1;32m 32\u001b[0m docs \u001b[38;5;241m=\u001b[39m db\u001b[38;5;241m.\u001b[39msimilarity_search_by_vector(embedding_vector)\n",
|
168 |
-
"File \u001b[0;32m~/miniconda3/envs/
|
169 |
"\u001b[0;31mImportError\u001b[0m: Could not import chromadb python package. Please install it with `pip install chromadb`."
|
170 |
]
|
171 |
-
},
|
172 |
-
{
|
173 |
-
"ename": "",
|
174 |
-
"evalue": "",
|
175 |
-
"output_type": "error",
|
176 |
-
"traceback": [
|
177 |
-
"\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
|
178 |
-
"\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
|
179 |
-
"\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
|
180 |
-
"\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
|
181 |
-
]
|
182 |
}
|
183 |
],
|
184 |
"source": [
|
@@ -212,7 +173,7 @@
|
|
212 |
"name": "python",
|
213 |
"nbconvert_exporter": "python",
|
214 |
"pygments_lexer": "ipython3",
|
215 |
-
"version": "3.10.
|
216 |
}
|
217 |
},
|
218 |
"nbformat": 4,
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
"metadata": {},
|
7 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
"source": [
|
9 |
"import cv2\n",
|
10 |
"import os\n",
|
|
|
18 |
},
|
19 |
{
|
20 |
"cell_type": "code",
|
21 |
+
"execution_count": 2,
|
22 |
"metadata": {},
|
23 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
"source": [
|
25 |
"image_path = r\"data/remove_flash.jpg\""
|
26 |
]
|
|
|
130 |
"output_type": "error",
|
131 |
"traceback": [
|
132 |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
133 |
+
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
|
134 |
+
"File \u001b[0;32m~/miniconda3/envs/catlognew/lib/python3.10/site-packages/langchain_community/vectorstores/chroma.py:81\u001b[0m, in \u001b[0;36mChroma.__init__\u001b[0;34m(self, collection_name, embedding_function, persist_directory, client_settings, collection_metadata, client, relevance_score_fn)\u001b[0m\n\u001b[1;32m 80\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 81\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\n\u001b[1;32m 82\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m\n",
|
135 |
+
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'chromadb'",
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
"\nDuring handling of the above exception, another exception occurred:\n",
|
137 |
"\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
|
138 |
"Cell \u001b[0;32mIn[10], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m name \u001b[38;5;241m=\u001b[39m response[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbrand\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m response[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtype_of_product\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m 2\u001b[0m name \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBRU\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 3\u001b[0m get_prod_name_db \u001b[38;5;241m=\u001b[39m \u001b[43msearch\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n",
|
139 |
"File \u001b[0;32m~/Catalog-Digitization-/src/app/api/module/vectorsearch.py:30\u001b[0m, in \u001b[0;36msearch\u001b[0;34m(query)\u001b[0m\n\u001b[1;32m 28\u001b[0m embeddings \u001b[38;5;241m=\u001b[39m OpenAIEmbeddings()\n\u001b[1;32m 29\u001b[0m db_path \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(file_Directory,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvectorstore\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 30\u001b[0m db \u001b[38;5;241m=\u001b[39m \u001b[43mChroma\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpersist_directory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mdb_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43membedding_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43membeddings\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 31\u001b[0m embedding_vector \u001b[38;5;241m=\u001b[39m OpenAIEmbeddings()\u001b[38;5;241m.\u001b[39membed_query(query)\n\u001b[1;32m 32\u001b[0m docs \u001b[38;5;241m=\u001b[39m db\u001b[38;5;241m.\u001b[39msimilarity_search_by_vector(embedding_vector)\n",
|
140 |
+
"File \u001b[0;32m~/miniconda3/envs/catlognew/lib/python3.10/site-packages/langchain_community/vectorstores/chroma.py:84\u001b[0m, in \u001b[0;36mChroma.__init__\u001b[0;34m(self, collection_name, embedding_function, persist_directory, client_settings, collection_metadata, client, relevance_score_fn)\u001b[0m\n\u001b[1;32m 82\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m\n\u001b[1;32m 83\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m:\n\u001b[0;32m---> 84\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(\n\u001b[1;32m 85\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCould not import chromadb python package. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 86\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPlease install it with `pip install chromadb`.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 87\u001b[0m )\n\u001b[1;32m 89\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m client \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 90\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_client_settings \u001b[38;5;241m=\u001b[39m client_settings\n",
|
141 |
"\u001b[0;31mImportError\u001b[0m: Could not import chromadb python package. Please install it with `pip install chromadb`."
|
142 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
}
|
144 |
],
|
145 |
"source": [
|
|
|
173 |
"name": "python",
|
174 |
"nbconvert_exporter": "python",
|
175 |
"pygments_lexer": "ipython3",
|
176 |
+
"version": "3.10.13"
|
177 |
}
|
178 |
},
|
179 |
"nbformat": 4,
|
src/app/api/module/llm_vision.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import base64
|
2 |
import requests
|
3 |
from config import OPENAI_API_KEY
|
|
|
4 |
import os
|
5 |
|
6 |
|
@@ -55,4 +56,21 @@ class OpenAIVision:
|
|
55 |
}
|
56 |
|
57 |
response = requests.post(self.base_url, headers=headers, json=payload)
|
58 |
-
return response.json()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import base64
|
2 |
import requests
|
3 |
from config import OPENAI_API_KEY
|
4 |
+
from openai import OpenAI
|
5 |
import os
|
6 |
|
7 |
|
|
|
56 |
}
|
57 |
|
58 |
response = requests.post(self.base_url, headers=headers, json=payload)
|
59 |
+
return response.json()
|
60 |
+
|
61 |
+
|
62 |
+
def getname(self , prompt):
|
63 |
+
client = OpenAI()
|
64 |
+
completion = client.chat.completions.create(
|
65 |
+
model="gpt-3.5-turbo",
|
66 |
+
messages=[
|
67 |
+
{"role": "user", "content": prompt}
|
68 |
+
]
|
69 |
+
)
|
70 |
+
|
71 |
+
return completion.choices[0].message
|
72 |
+
|
73 |
+
|
74 |
+
|
75 |
+
|
76 |
+
|
src/app/api/module/product_description.py
CHANGED
@@ -4,23 +4,39 @@ import matplotlib.pyplot as plt
|
|
4 |
import numpy as np
|
5 |
from llm_vision import OpenAIVision
|
6 |
from ocr import azure_ocr
|
7 |
-
from prompts.base import base_prompt
|
8 |
from utils import extract_json_from_text
|
9 |
from vectorsearch import search , get_detail_df
|
|
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
def get_product_description(image_path):
|
14 |
-
details = azure_ocr(image_path)
|
15 |
prompt = base_prompt.format(text = details)
|
16 |
obj = OpenAIVision()
|
17 |
-
|
18 |
-
response = extract_json_from_text(
|
19 |
-
|
20 |
return response
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
def add_in_db(response):
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
|
|
|
|
|
4 |
import numpy as np
|
5 |
from llm_vision import OpenAIVision
|
6 |
from ocr import azure_ocr
|
7 |
+
from prompts.base import base_prompt, gpt3
|
8 |
from utils import extract_json_from_text
|
9 |
from vectorsearch import search , get_detail_df
|
10 |
+
import json
|
11 |
|
12 |
+
def get_details(image_path , details): ### If product is not in database
|
|
|
|
|
|
|
13 |
prompt = base_prompt.format(text = details)
|
14 |
obj = OpenAIVision()
|
15 |
+
jsontext = obj.get_image_description(image_path,prompt)
|
16 |
+
response = extract_json_from_text(jsontext['choices'][0]['message']['content'])
|
17 |
+
##add
|
18 |
return response
|
19 |
|
20 |
+
def get_name(image_path): ### If product is in database
|
21 |
+
details = azure_ocr(image_path)
|
22 |
+
prompt = gpt3.format(text = details)
|
23 |
+
obj = OpenAIVision()
|
24 |
+
name = obj.getname(prompt)
|
25 |
+
jsontext = json.loads(name.content)
|
26 |
+
print(jsontext)
|
27 |
+
product_name = jsontext['product_name']
|
28 |
+
get_prod_name_db = search(product_name)
|
29 |
+
# if name not in db:
|
30 |
+
# response = get_details(image_path, details)
|
31 |
+
# add_in_db(response)
|
32 |
+
# else:
|
33 |
+
# add_in_db(get_prod_name_db)
|
34 |
+
|
35 |
+
|
36 |
def add_in_db(response):
|
37 |
+
pass
|
38 |
+
|
39 |
+
|
40 |
+
if __name__ == "__main__":
|
41 |
+
image_path = r"data/remove_flash.jpg"
|
42 |
+
get_name(image_path)
|
src/app/api/module/prompts/base.py
CHANGED
@@ -32,4 +32,13 @@ base_prompt = dedent("""
|
|
32 |
|
33 |
Analyse data from the above product description to give me the following details in JSON format:
|
34 |
Only return the output in the required json format.
|
35 |
-
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
Analyse data from the above product description to give me the following details in JSON format:
|
34 |
Only return the output in the required json format.
|
35 |
+
""")
|
36 |
+
|
37 |
+
|
38 |
+
gpt3 = dedent(""" I am providing you with a OCR text about a product.
|
39 |
+
|
40 |
+
OCR TEXT : {text}
|
41 |
+
I want you to provide me with the name of prodcut in following JSON format:
|
42 |
+
"product_name" : "BRU instant coffee".
|
43 |
+
|
44 |
+
""")
|
src/app/api/module/vectorsearch.py
CHANGED
@@ -5,44 +5,63 @@ from langchain_openai import OpenAIEmbeddings
|
|
5 |
from langchain.text_splitter import CharacterTextSplitter
|
6 |
from langchain_community.vectorstores import Chroma
|
7 |
import pandas as pd
|
8 |
-
|
|
|
9 |
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
|
|
|
|
|
|
|
|
|
|
|
10 |
|
|
|
|
|
|
|
|
|
11 |
|
12 |
-
# df = pd.read_excel(r"/home/vrush/Catalog-Digitization-/src/module/data/Catalog Digitization/ONDC Test Data _ Images/ONDCSampleData.xlsx")
|
13 |
-
# df_new = pd.DataFrame(columns=["id", "name"])
|
14 |
-
# df_new = df['name']
|
15 |
-
# df_new.to_csv(r"data/data.csv", index=False)
|
16 |
|
17 |
-
def
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
embeddings = OpenAIEmbeddings()
|
24 |
-
os.makedirs(db_path, exist_ok=True)
|
25 |
-
Chroma.from_documents(docs, embeddings, persist_directory= db_path)
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
def search(query):
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
print(docs[0].page_content)
|
34 |
-
return docs[0].page_content
|
35 |
|
36 |
|
37 |
def get_detail_df(name):
|
38 |
-
|
39 |
-
|
40 |
-
|
|
|
41 |
return item
|
42 |
else:
|
43 |
-
|
|
|
44 |
|
45 |
if __name__ == "__main__":
|
46 |
-
create_vector()
|
47 |
-
name = search("
|
48 |
-
print(
|
|
|
|
5 |
from langchain.text_splitter import CharacterTextSplitter
|
6 |
from langchain_community.vectorstores import Chroma
|
7 |
import pandas as pd
|
8 |
+
import chromadb,uuid
|
9 |
+
from chromadb.utils import embedding_functions
|
10 |
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
|
11 |
+
db_path = os.path.join(file_Directory,"vectorstore")
|
12 |
+
client = chromadb.PersistentClient(path=db_path)
|
13 |
+
|
14 |
+
def generate_uuid():
|
15 |
+
return str(uuid.uuid4())
|
16 |
|
17 |
+
|
18 |
+
emmbedding_model = "text-embedding-3-large"
|
19 |
+
openai_ef = embedding_functions.OpenAIEmbeddingFunction(model_name=emmbedding_model,api_key=OPENAI_API_KEY)
|
20 |
+
collection = client.get_or_create_collection(name="products")
|
21 |
|
|
|
|
|
|
|
|
|
22 |
|
23 |
+
def add_document_chroma_collection(collection_object, document_list, embedding_list, metadata):
|
24 |
+
metadata_list = [metadata for i in range(len(document_list))]
|
25 |
+
ids_gen = [generate_uuid() for i in range(len(document_list))]
|
26 |
+
collection_object.add(embeddings = embedding_list,documents = document_list,metadatas = metadata_list , ids = ids_gen)
|
27 |
+
if collection_object:
|
28 |
+
return True
|
|
|
|
|
|
|
29 |
|
30 |
+
|
31 |
+
def create_vector():
|
32 |
+
df = pd.read_csv(r"/home/vrush/Catalog-Digitization-/src/app/api/module/data/data.csv")
|
33 |
+
for i , items in df.iterrows():
|
34 |
+
print(items['name'])
|
35 |
+
metadata = {"empty":""}
|
36 |
+
doc_embed = openai_ef([items['name']])
|
37 |
+
add_document_chroma_collection(collection_object = collection, document_list = [items["name"]], embedding_list = doc_embed ,metadata = metadata)
|
38 |
+
|
39 |
+
|
40 |
+
|
41 |
+
|
42 |
+
|
43 |
+
|
44 |
+
|
45 |
def search(query):
|
46 |
+
embbed_text_search = openai_ef(query)
|
47 |
+
data = collection.query(query_embeddings = embbed_text_search, n_results=10)
|
48 |
+
return data
|
49 |
+
|
50 |
+
|
|
|
|
|
51 |
|
52 |
|
53 |
def get_detail_df(name):
|
54 |
+
print(name)
|
55 |
+
df = pd.read_excel(r"/home/vrush/Catalog-Digitization-/src/app/api/module/data/Catalog/Data_Images/ONDCSampleData.xlsx")
|
56 |
+
for i,item in df.iterrows():
|
57 |
+
if str(item['name']) == str(name).split(":")[1].strip():
|
58 |
return item
|
59 |
else:
|
60 |
+
continue
|
61 |
+
|
62 |
|
63 |
if __name__ == "__main__":
|
64 |
+
# create_vector()
|
65 |
+
name = search("Atta")
|
66 |
+
print(name)
|
67 |
+
# # # print(get_detail_df(name))
|
src/app/main/settings.py
CHANGED
@@ -78,12 +78,17 @@ WSGI_APPLICATION = 'main.wsgi.application'
|
|
78 |
|
79 |
DATABASES = {
|
80 |
'default': {
|
81 |
-
'ENGINE': 'django.db.backends.
|
82 |
-
'
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
}
|
84 |
}
|
85 |
|
86 |
-
|
87 |
# Password validation
|
88 |
# https://docs.djangoproject.com/en/5.0/ref/settings/#auth-password-validators
|
89 |
|
|
|
78 |
|
79 |
DATABASES = {
|
80 |
'default': {
|
81 |
+
'ENGINE': 'django.db.backends.mysql',
|
82 |
+
# 'ENGINE': 'mysql.connector.django',
|
83 |
+
'NAME': 'test2',
|
84 |
+
'USER': 'cosmosgcp',
|
85 |
+
'PASSWORD': '$Bonsai999',
|
86 |
+
'HOST': '34.122.223.224',
|
87 |
+
'PORT': '3306',
|
88 |
+
'OPTIONS': {'charset': 'utf8mb4','auth_plugin': 'mysql_native_password'},
|
89 |
}
|
90 |
}
|
91 |
|
|
|
92 |
# Password validation
|
93 |
# https://docs.djangoproject.com/en/5.0/ref/settings/#auth-password-validators
|
94 |
|
src/requirements.txt
CHANGED
@@ -1,10 +1,8 @@
|
|
1 |
-
gradio==4.17.0
|
2 |
langchain==0.1.6
|
3 |
python-decouple==3.4
|
4 |
pandas
|
5 |
azure-ai-formrecognizer
|
6 |
easyocr
|
7 |
-
langchain
|
8 |
chromadb
|
9 |
langchain_openai
|
10 |
unstructured
|
|
|
|
|
1 |
langchain==0.1.6
|
2 |
python-decouple==3.4
|
3 |
pandas
|
4 |
azure-ai-formrecognizer
|
5 |
easyocr
|
|
|
6 |
chromadb
|
7 |
langchain_openai
|
8 |
unstructured
|