Spaces:
Sleeping
Sleeping
Update .env.example and requirements.txt, delete unused files
Browse files- src/.env.example +1 -0
- src/app/api/models.py +27 -3
- src/module/audio_text.py +0 -49
- src/module/image.ipynb +0 -44
- src/module/image_enhance.py +0 -111
- src/module/llm_vision.py +0 -59
- src/module/prompts/base.py +0 -34
- src/module/vectorsearch.py +0 -38
- src/requirements.txt +5 -1
src/.env.example
CHANGED
@@ -1 +1,2 @@
|
|
1 |
OPENAI_API_KEY=""
|
|
|
|
1 |
OPENAI_API_KEY=""
|
2 |
+
AZURE = ""
|
src/app/api/models.py
CHANGED
@@ -4,7 +4,7 @@ class Product(models.Model):
|
|
4 |
barcode = models.CharField(max_length=20)
|
5 |
brand = models.CharField(max_length=100)
|
6 |
sub_brand = models.CharField(max_length=100, blank=True, null=True)
|
7 |
-
|
8 |
product_name = models.CharField(max_length=200)
|
9 |
weight = models.FloatField()
|
10 |
variant = models.CharField(max_length=100, blank=True, null=True)
|
@@ -14,6 +14,30 @@ class Product(models.Model):
|
|
14 |
child_category = models.CharField(max_length=100)
|
15 |
sub_child_category = models.CharField(max_length=100, blank=True, null=True)
|
16 |
images_paths = models.CharField(max_length=3000, blank=True, null=True) # Comma separated paths
|
|
|
|
|
|
|
17 |
|
18 |
-
def
|
19 |
-
return self.product_name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
barcode = models.CharField(max_length=20)
|
5 |
brand = models.CharField(max_length=100)
|
6 |
sub_brand = models.CharField(max_length=100, blank=True, null=True)
|
7 |
+
manufactured_by = models.CharField(max_length=200)
|
8 |
product_name = models.CharField(max_length=200)
|
9 |
weight = models.FloatField()
|
10 |
variant = models.CharField(max_length=100, blank=True, null=True)
|
|
|
14 |
child_category = models.CharField(max_length=100)
|
15 |
sub_child_category = models.CharField(max_length=100, blank=True, null=True)
|
16 |
images_paths = models.CharField(max_length=3000, blank=True, null=True) # Comma separated paths
|
17 |
+
description = models.TextField(max_length=3000, blank=True, null=True)
|
18 |
+
quantity = models.IntegerField(null=True, blank=True)
|
19 |
+
mrp = models.CharField(max_length=100, blank=True, null=True)
|
20 |
|
21 |
+
def _str_(self):
|
22 |
+
return self.product_name
|
23 |
+
|
24 |
+
|
25 |
+
class Database(models.Model):
|
26 |
+
barcode = models.CharField(max_length=20)
|
27 |
+
brand = models.CharField(max_length=100)
|
28 |
+
sub_brand = models.CharField(max_length=100, blank=True, null=True)
|
29 |
+
manufactured_by = models.CharField(max_length=200)
|
30 |
+
product_name = models.CharField(max_length=200)
|
31 |
+
weight = models.FloatField()
|
32 |
+
variant = models.CharField(max_length=100, blank=True, null=True)
|
33 |
+
net_content = models.CharField(max_length=100, blank=True, null=True)
|
34 |
+
price = models.DecimalField(max_digits=10, decimal_places=2)
|
35 |
+
parent_category = models.CharField(max_length=100)
|
36 |
+
child_category = models.CharField(max_length=100)
|
37 |
+
sub_child_category = models.CharField(max_length=100, blank=True, null=True)
|
38 |
+
images_paths = models.CharField(max_length=3000, blank=True, null=True) # Comma separated paths
|
39 |
+
description = models.TextField(max_length=3000, blank=True, null=True)
|
40 |
+
quantity = models.IntegerField(null=True, blank=True)
|
41 |
+
promotion_on_the_pack = models.CharField(max_length=100, blank=True, null=True)
|
42 |
+
type_of_packaging = models.CharField(max_length=100, blank=True, null=True)
|
43 |
+
mrp = models.CharField(max_length=100, blank=True, null=True)
|
src/module/audio_text.py
DELETED
@@ -1,49 +0,0 @@
|
|
1 |
-
# from whisper_jax import FlaxWhisperPipline
|
2 |
-
# import jax.numpy as jnp
|
3 |
-
import whisper
|
4 |
-
print(whisper.__file__)
|
5 |
-
from openai import OpenAI
|
6 |
-
from module.config import OPENAI_API_KEY
|
7 |
-
import os
|
8 |
-
|
9 |
-
client = OpenAI()
|
10 |
-
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
|
11 |
-
|
12 |
-
|
13 |
-
def whisper_pipeline_tpu(audio):
|
14 |
-
pipeline = FlaxWhisperPipline("openai/whisper-large-v3", dtype=jnp.bfloat16, batch_size=16)
|
15 |
-
text = pipeline(audio)
|
16 |
-
return text
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
def whisper_pipeline(audio_path):
|
21 |
-
model = whisper.load_model("medium")
|
22 |
-
# load audio and pad/trim it to fit 30 seconds
|
23 |
-
audio = whisper.load_audio(audio_path)
|
24 |
-
audio = whisper.pad_or_trim(audio)
|
25 |
-
# make log-Mel spectrogram and move to the same device as the model
|
26 |
-
mel = whisper.log_mel_spectrogram(audio).to(model.device)
|
27 |
-
# detect the spoken language
|
28 |
-
_, probs = model.detect_language(mel)
|
29 |
-
print(f"Detected language: {max(probs, key=probs.get)}")
|
30 |
-
# decode the audio
|
31 |
-
options = whisper.DecodingOptions()
|
32 |
-
result = whisper.decode(model, mel, options)
|
33 |
-
# print the recognized text
|
34 |
-
print(result.text)
|
35 |
-
return result.text
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
def whisper_openai(audio_path):
|
42 |
-
audio_file= open(audio_path, "rb")
|
43 |
-
transcript = client.audio.transcriptions.create(
|
44 |
-
model="whisper-1",
|
45 |
-
file=audio_file
|
46 |
-
)
|
47 |
-
return transcript
|
48 |
-
|
49 |
-
whisper_pipeline()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/module/image.ipynb
DELETED
@@ -1,44 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "code",
|
5 |
-
"execution_count": 1,
|
6 |
-
"metadata": {},
|
7 |
-
"outputs": [],
|
8 |
-
"source": [
|
9 |
-
"import cv2\n",
|
10 |
-
"import os\n",
|
11 |
-
"import matplotlib.pyplot as plt \n",
|
12 |
-
"import numpy as np "
|
13 |
-
]
|
14 |
-
},
|
15 |
-
{
|
16 |
-
"cell_type": "code",
|
17 |
-
"execution_count": null,
|
18 |
-
"metadata": {},
|
19 |
-
"outputs": [],
|
20 |
-
"source": []
|
21 |
-
}
|
22 |
-
],
|
23 |
-
"metadata": {
|
24 |
-
"kernelspec": {
|
25 |
-
"display_name": "catlog",
|
26 |
-
"language": "python",
|
27 |
-
"name": "python3"
|
28 |
-
},
|
29 |
-
"language_info": {
|
30 |
-
"codemirror_mode": {
|
31 |
-
"name": "ipython",
|
32 |
-
"version": 3
|
33 |
-
},
|
34 |
-
"file_extension": ".py",
|
35 |
-
"mimetype": "text/x-python",
|
36 |
-
"name": "python",
|
37 |
-
"nbconvert_exporter": "python",
|
38 |
-
"pygments_lexer": "ipython3",
|
39 |
-
"version": "3.10.13"
|
40 |
-
}
|
41 |
-
},
|
42 |
-
"nbformat": 4,
|
43 |
-
"nbformat_minor": 2
|
44 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/module/image_enhance.py
DELETED
@@ -1,111 +0,0 @@
|
|
1 |
-
import cv2
|
2 |
-
import os
|
3 |
-
from config import file_Directory
|
4 |
-
import numpy as np
|
5 |
-
from PIL import Image
|
6 |
-
|
7 |
-
class Image_Enhance():
|
8 |
-
|
9 |
-
def __init__(self, image_path) -> None:
|
10 |
-
self.image_path = image_path
|
11 |
-
|
12 |
-
def brightness_Adjust(self):
|
13 |
-
# Load the image
|
14 |
-
image = cv2.imread(self.image_path)
|
15 |
-
#Plot the original image
|
16 |
-
alpha = -1.1
|
17 |
-
# control brightness by 50
|
18 |
-
beta = 70
|
19 |
-
image2 = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
|
20 |
-
#Save the image
|
21 |
-
# imagepth = os.path.join(os.path.dirname(self.image_path), 'Brightness & contrast.jpg')
|
22 |
-
imagepth = os.path.join(file_Directory, 'Brightness & contrast.jpg')
|
23 |
-
cv2.imwrite(imagepth, image2)
|
24 |
-
return imagepth
|
25 |
-
|
26 |
-
def remove_flash(self, imagepth):
|
27 |
-
image = cv2.imread(imagepth)
|
28 |
-
# cv2.cvtColor is applied over the
|
29 |
-
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
30 |
-
|
31 |
-
# Apply adaptive thresholding to segment the text
|
32 |
-
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 4)
|
33 |
-
|
34 |
-
# Apply Gaussian blur to the grayscale image to reduce noise
|
35 |
-
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
|
36 |
-
|
37 |
-
# Threshold the blurred image to create a binary mask for the flashlight glare
|
38 |
-
_, mask = cv2.threshold(blurred, 240, 255, cv2.THRESH_BINARY_INV)
|
39 |
-
|
40 |
-
# Combine the text and glare masks
|
41 |
-
mask = cv2.bitwise_or(mask, thresh)
|
42 |
-
|
43 |
-
# Apply morphological closing to further remove small areas of glare
|
44 |
-
kernel = np.ones((5,5),np.uint8)
|
45 |
-
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
|
46 |
-
|
47 |
-
# Apply the mask to the original image to remove flashlight glare
|
48 |
-
result = cv2.bitwise_and(image, image, mask=mask)
|
49 |
-
|
50 |
-
cv2.imwrite(os.path.join(file_Directory, 'remove_flash.jpg'), result)
|
51 |
-
|
52 |
-
def sharpen(self, imagepth):
|
53 |
-
image = cv2.imread(imagepth)
|
54 |
-
# Create the sharpening kernel
|
55 |
-
kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
|
56 |
-
# Sharpen the image
|
57 |
-
sharpened_image = cv2.filter2D(image, -1, kernel)
|
58 |
-
#Save the image
|
59 |
-
imagepath = os.path.join(file_Directory, 'sharpened_image.jpg')
|
60 |
-
cv2.imwrite(imagepath, sharpened_image)
|
61 |
-
return imagepath
|
62 |
-
|
63 |
-
|
64 |
-
def lapacian_sharpen(self, imagepth):
|
65 |
-
#Load the image
|
66 |
-
image = cv2.imread(imagepth)
|
67 |
-
|
68 |
-
# Sharpen the image using the Laplacian operator
|
69 |
-
sharpened_image2 = cv2.Laplacian(image, cv2.CV_64F)
|
70 |
-
imagepath = os.path.join(file_Directory, 'Laplacian_sharpened_image.jpg')
|
71 |
-
#Save the image
|
72 |
-
cv2.imwrite(imagepath, sharpened_image2)
|
73 |
-
|
74 |
-
def removing_noise(self, imagepth):
|
75 |
-
# Load the image
|
76 |
-
image = cv2.imread(imagepth)
|
77 |
-
# Remove noise using a median filter
|
78 |
-
filtered_image = cv2.medianBlur(image, 1)
|
79 |
-
imagepath = os.path.join(file_Directory, 'Median Blur.jpg')
|
80 |
-
#Save the image
|
81 |
-
cv2.imwrite(imagepath, filtered_image)
|
82 |
-
|
83 |
-
return imagepath
|
84 |
-
|
85 |
-
|
86 |
-
def enhance_color(self, imagepth):
|
87 |
-
# Load the image
|
88 |
-
image = cv2.imread(imagepth)
|
89 |
-
image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
|
90 |
-
|
91 |
-
# Adjust the hue, saturation, and value of the image
|
92 |
-
# Adjusts the hue by multiplying it by 0.7
|
93 |
-
image[:, :, 0] = image[:, :, 0] * 0.7
|
94 |
-
# Adjusts the saturation by multiplying it by 1.5
|
95 |
-
image[:, :, 1] = image[:, :, 1] * 1.5
|
96 |
-
# Adjusts the value by multiplying it by 0.5
|
97 |
-
image[:, :, 2] = image[:, :, 2] * 0.5
|
98 |
-
|
99 |
-
image2 = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
|
100 |
-
imagepath = os.path.join(file_Directory, 'enhanced coloured.jpg')
|
101 |
-
#Save the image
|
102 |
-
cv2.imwrite(imagepath, image2)
|
103 |
-
|
104 |
-
|
105 |
-
obj = Image_Enhance(r"data/Catalog Digitization/ONDC Test Data _ Images/Product Images/Bru_Instant_Coffee_Powder.png")
|
106 |
-
pth = obj.brightness_Adjust()
|
107 |
-
sharpen = obj.sharpen(pth)
|
108 |
-
lapacian_sharpen = obj.lapacian_sharpen(sharpen)
|
109 |
-
noise = obj.removing_noise(sharpen)
|
110 |
-
obj.enhance_color(noise)
|
111 |
-
obj.remove_flash(sharpen)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/module/llm_vision.py
DELETED
@@ -1,59 +0,0 @@
|
|
1 |
-
import base64
|
2 |
-
import requests
|
3 |
-
from config import OPENAI_API
|
4 |
-
import os
|
5 |
-
|
6 |
-
OPENAI_API = os.getenv("OPENAI_API")
|
7 |
-
|
8 |
-
"""
|
9 |
-
openai_vision = OpenAIVision(api_key)
|
10 |
-
image_path = "path_to_your_image.jpg"
|
11 |
-
prompt = ""
|
12 |
-
response = openai_vision.get_image_description(prompt,image_path)
|
13 |
-
"""
|
14 |
-
|
15 |
-
class OpenAIVision:
|
16 |
-
def __init__(self):
|
17 |
-
self.api_key = OPENAI_API
|
18 |
-
self.base_url = "https://api.openai.com/v1/chat/completions"
|
19 |
-
|
20 |
-
def __encode_image(self, image_path):
|
21 |
-
with open(image_path, "rb") as image_file:
|
22 |
-
return base64.b64encode(image_file.read()).decode('utf-8')
|
23 |
-
|
24 |
-
def get_image_description(self, image_path, prompt):
|
25 |
-
base64_image = self.__encode_image(image_path)
|
26 |
-
|
27 |
-
headers = {
|
28 |
-
"Content-Type": "application/json",
|
29 |
-
"Authorization": f"Bearer {self.api_key}"
|
30 |
-
}
|
31 |
-
|
32 |
-
payload = {
|
33 |
-
"model": "gpt-4-vision-preview",
|
34 |
-
"temperature": 0.0,
|
35 |
-
"messages": [
|
36 |
-
{
|
37 |
-
"role": "user",
|
38 |
-
"content": [
|
39 |
-
{
|
40 |
-
"type": "text",
|
41 |
-
"text": prompt,
|
42 |
-
},
|
43 |
-
{
|
44 |
-
"type": "image_url",
|
45 |
-
"image_url": {
|
46 |
-
"url": f"data:image/jpeg;base64,{base64_image}"
|
47 |
-
}
|
48 |
-
}
|
49 |
-
|
50 |
-
]
|
51 |
-
|
52 |
-
}
|
53 |
-
],
|
54 |
-
"max_tokens": 1000,
|
55 |
-
|
56 |
-
}
|
57 |
-
|
58 |
-
response = requests.post(self.base_url, headers=headers, json=payload)
|
59 |
-
return response.json()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/module/prompts/base.py
DELETED
@@ -1,34 +0,0 @@
|
|
1 |
-
from textwrap import dedent
|
2 |
-
|
3 |
-
base_prompt = dedent("""
|
4 |
-
### Instruction:
|
5 |
-
product description starts here
|
6 |
-
|
7 |
-
{text}
|
8 |
-
|
9 |
-
product description ends here
|
10 |
-
|
11 |
-
this is the categorys list ['BEVERAGES', 'SNACKS & BRANDED FOODS', 'NOT FOUND', 'EGGS, MEAT & FISH', 'FOODGRAINS, OIL & MASALA', 'PERSONAL CARE', 'CLEANING & HOUSEHOLD', 'FRUITS & VEGETABLES', 'BAKERY, CAKES & DAIRY', 'MAKEUP', 'BABY CARE', 'PET FOOD & ACCESSORIES', 'NON FMCG', 'ALCOHOL & TOBACCO', 'WELLNESS', 'EVERYDAY MEDICINE-NEW', 'EXCERCISE & FITNESS', 'ALCOHOLIC BEVERAGES'].
|
12 |
-
|
13 |
-
Analyse data from the above product description to give me the following details in JSON format:
|
14 |
-
( return "null" where you don't have a answer)
|
15 |
-
|
16 |
-
"brand": "sample_brand",
|
17 |
-
"mrp": "The price might start with MRP or Rs.",
|
18 |
-
"unit": "per pack",
|
19 |
-
"Quantity": 1, ##num of products visible
|
20 |
-
"parent_category": "from the above given list",
|
21 |
-
"ingredients": ["ingredient1", "ingredient2", "ingredient3"],
|
22 |
-
"calorie_count": "Would be in numbers",
|
23 |
-
"marketed_by": "sample_marketer",
|
24 |
-
"manufactured_by": "sample_manufacturer",
|
25 |
-
"manufactured_in_country": "Country XYZ",
|
26 |
-
"type_of_packaging": "Box",
|
27 |
-
"promotion_on_the_pack": "if any",
|
28 |
-
"type_of_product": "give this your understanding",
|
29 |
-
"pack_of_or_no_of_units": "No. of Units"
|
30 |
-
|
31 |
-
|
32 |
-
Analyse data from the above product description to give me the following details in JSON format:
|
33 |
-
Only return the output in the required json format.
|
34 |
-
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/module/vectorsearch.py
DELETED
@@ -1,38 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
from config import OPENAI_API_KEY, file_Directory
|
3 |
-
from langchain_community.document_loaders.csv_loader import CSVLoader
|
4 |
-
from langchain_openai import OpenAIEmbeddings
|
5 |
-
from langchain.text_splitter import CharacterTextSplitter
|
6 |
-
from langchain_community.vectorstores import Chroma
|
7 |
-
import pandas as pd
|
8 |
-
|
9 |
-
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
|
10 |
-
|
11 |
-
|
12 |
-
# df = pd.read_excel(r"/home/vrush/Catalog-Digitization-/src/module/data/Catalog Digitization/ONDC Test Data _ Images/ONDCSampleData.xlsx")
|
13 |
-
# df_new = pd.DataFrame(columns=["id", "name"])
|
14 |
-
# df_new = df['name']
|
15 |
-
# df_new.to_csv(r"data/data.csv", index=False)
|
16 |
-
|
17 |
-
def create_vector():
|
18 |
-
loader = CSVLoader(file_path="data/data.csv")
|
19 |
-
docs = loader.load()
|
20 |
-
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
21 |
-
documents = text_splitter.split_documents(docs)
|
22 |
-
db_path = os.path.join(file_Directory,"vectorstore")
|
23 |
-
embeddings = OpenAIEmbeddings()
|
24 |
-
os.makedirs(db_path, exist_ok=True)
|
25 |
-
Chroma.from_documents(docs, embeddings, persist_directory= db_path)
|
26 |
-
|
27 |
-
def search(query):
|
28 |
-
embeddings = OpenAIEmbeddings()
|
29 |
-
db_path = os.path.join(file_Directory,"vectorstore")
|
30 |
-
db = Chroma(persist_directory= db_path, embedding_function= embeddings)
|
31 |
-
embedding_vector = OpenAIEmbeddings().embed_query(query)
|
32 |
-
docs = db.similarity_search_by_vector(embedding_vector)
|
33 |
-
print(docs[0].page_content)
|
34 |
-
|
35 |
-
|
36 |
-
if __name__ == "__main__":
|
37 |
-
create_vector()
|
38 |
-
search("Choco Creme Wafers")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/requirements.txt
CHANGED
@@ -3,4 +3,8 @@ langchain==0.1.6
|
|
3 |
python-decouple==3.4
|
4 |
pandas
|
5 |
azure-ai-formrecognizer
|
6 |
-
easyocr
|
|
|
|
|
|
|
|
|
|
3 |
python-decouple==3.4
|
4 |
pandas
|
5 |
azure-ai-formrecognizer
|
6 |
+
easyocr
|
7 |
+
langchain
|
8 |
+
chromadb
|
9 |
+
langchain_openai
|
10 |
+
unstructured
|