Spaces:
Sleeping
Sleeping
Patrick Walukagga
commited on
Commit
·
17249df
1
Parent(s):
0ec9b61
Update README instructions
Browse files- README.md +37 -0
- app.py +7 -14
- utils/helpers.py +24 -0
README.md
CHANGED
@@ -60,6 +60,15 @@ gradio app.py
|
|
60 |
|
61 |
Browse the application with the link `http://localhost:7860/`
|
62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
## Run with docker
|
65 |
To run the application with docker locally, first make sure you have docker installed. See [link](https://docs.docker.com/)
|
@@ -84,6 +93,15 @@ docker run -it -p 7860:7860 --rm --name gradio --network=gradio-fastapi-network
|
|
84 |
|
85 |
Browse the application with the link `http://localhost:7860/`
|
86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
## Deploy to AWS ECS (Elastic Container Service) with Fargate
|
89 |
|
@@ -149,6 +167,25 @@ docker tag gradio-app-prod:latest "${ECR_BACKEND_GRADIO_URL}:latest"
|
|
149 |
docker push "${ECR_BACKEND_GRADIO_URL}:latest"
|
150 |
```
|
151 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
### Setup and Provision AWS ECS infra using AWS Cloudformation (IaC)
|
153 |
|
154 |
#### Install
|
|
|
60 |
|
61 |
Browse the application with the link `http://localhost:7860/`
|
62 |
|
63 |
+
### Run the api
|
64 |
+
Make sure the gradio app is running on port `7860` and then run the command below in another terminal tab in the same directory.
|
65 |
+
|
66 |
+
```sh
|
67 |
+
uvicorn api:app --reload
|
68 |
+
```
|
69 |
+
|
70 |
+
Browse the api at `http://localhost:8000/docs`
|
71 |
+
|
72 |
|
73 |
## Run with docker
|
74 |
To run the application with docker locally, first make sure you have docker installed. See [link](https://docs.docker.com/)
|
|
|
93 |
|
94 |
Browse the application with the link `http://localhost:7860/`
|
95 |
|
96 |
+
To run the api with docker run the commands below. The gradio container should be run first before running the api.
|
97 |
+
|
98 |
+
```sh
|
99 |
+
docker build -f Dockerfile.api -t fastapi-app .
|
100 |
+
docker run -it -p 8000:8000 --rm --name fastapi --network=gradio-fastapi-network fastapi-app
|
101 |
+
```
|
102 |
+
|
103 |
+
Browse the api at `http://localhost:8000/docs`
|
104 |
+
|
105 |
|
106 |
## Deploy to AWS ECS (Elastic Container Service) with Fargate
|
107 |
|
|
|
167 |
docker push "${ECR_BACKEND_GRADIO_URL}:latest"
|
168 |
```
|
169 |
|
170 |
+
- Now create fastapi repostory
|
171 |
+
|
172 |
+
```sh
|
173 |
+
aws ecr create-repository \
|
174 |
+
--repository-name fastapi-api-prod \
|
175 |
+
--image-tag-mutability MUTABLE
|
176 |
+
|
177 |
+
export ECR_BACKEND_FASTAPI_URL="$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/fastapi-api-prod"
|
178 |
+
echo $ECR_BACKEND_FASTAPI_URL
|
179 |
+
```
|
180 |
+
|
181 |
+
- Build the docker image for the production and push to ECR
|
182 |
+
|
183 |
+
```sh
|
184 |
+
docker build -f Dockerfile.api.prod -t fastapi-api-prod .
|
185 |
+
docker tag fastapi-api-prod:latest "${ECR_BACKEND_FASTAPI_URL}:latest"
|
186 |
+
docker push "${ECR_BACKEND_FASTAPI_URL}:latest"
|
187 |
+
```
|
188 |
+
|
189 |
### Setup and Provision AWS ECS infra using AWS Cloudformation (IaC)
|
190 |
|
191 |
#### Install
|
app.py
CHANGED
@@ -29,11 +29,14 @@ from utils.helpers import (
|
|
29 |
add_study_files_to_chromadb,
|
30 |
append_to_study_files,
|
31 |
chromadb_client,
|
|
|
32 |
)
|
33 |
from utils.pdf_processor import PDFProcessor
|
34 |
from utils.prompts import evidence_based_prompt, highlight_prompt
|
35 |
from utils.zotero_manager import ZoteroManager
|
36 |
|
|
|
|
|
37 |
# Configure logging
|
38 |
logging.basicConfig(level=logging.INFO)
|
39 |
logger = logging.getLogger(__name__)
|
@@ -53,10 +56,6 @@ rag_cache = {}
|
|
53 |
|
54 |
cache = LRUCache(maxsize=100)
|
55 |
|
56 |
-
# with open("study_files.json", "w") as file:
|
57 |
-
# data_ = {}
|
58 |
-
# json.dump(data_, file, indent=4)
|
59 |
-
|
60 |
|
61 |
def get_cache_value(key):
|
62 |
return cache.get(key)
|
@@ -69,13 +68,13 @@ logger.info(f"zotero_library_id cache: {zotero_library_id}")
|
|
69 |
def get_rag_pipeline(study_name: str) -> RAGPipeline:
|
70 |
"""Get or create a RAGPipeline instance for the given study by querying ChromaDB."""
|
71 |
if study_name not in rag_cache:
|
72 |
-
|
73 |
-
result = collection.get(ids=[study_name]) # Retrieve document by ID
|
74 |
|
75 |
-
if not
|
76 |
raise ValueError(f"Invalid study name: {study_name}")
|
77 |
|
78 |
-
study_file =
|
|
|
79 |
if not study_file:
|
80 |
raise ValueError(f"File path not found for study name: {study_name}")
|
81 |
|
@@ -95,10 +94,6 @@ def get_study_info(study_name: str | list) -> str:
|
|
95 |
study = get_study_file_by_name(study_name)
|
96 |
logger.info(f"Study: {study}")
|
97 |
|
98 |
-
# collection = chromadb_client.get_or_create_collection("study_files_collection")
|
99 |
-
# result = collection.get(ids=[study_name]) # Query by study name (as a list)
|
100 |
-
# logger.info(f"Result: {result}")
|
101 |
-
|
102 |
if not study:
|
103 |
raise ValueError(f"Invalid study name: {study_name}")
|
104 |
|
@@ -303,8 +298,6 @@ def download_as_csv(markdown_content):
|
|
303 |
|
304 |
|
305 |
# PDF Support
|
306 |
-
|
307 |
-
|
308 |
def process_pdf_uploads(files: List[gr.File], collection_name: str) -> str:
|
309 |
"""Process uploaded PDF files and add them to the system."""
|
310 |
if not files or not collection_name:
|
|
|
29 |
add_study_files_to_chromadb,
|
30 |
append_to_study_files,
|
31 |
chromadb_client,
|
32 |
+
create_directory,
|
33 |
)
|
34 |
from utils.pdf_processor import PDFProcessor
|
35 |
from utils.prompts import evidence_based_prompt, highlight_prompt
|
36 |
from utils.zotero_manager import ZoteroManager
|
37 |
|
38 |
+
data_directory = "data"
|
39 |
+
create_directory(data_directory)
|
40 |
# Configure logging
|
41 |
logging.basicConfig(level=logging.INFO)
|
42 |
logger = logging.getLogger(__name__)
|
|
|
56 |
|
57 |
cache = LRUCache(maxsize=100)
|
58 |
|
|
|
|
|
|
|
|
|
59 |
|
60 |
def get_cache_value(key):
|
61 |
return cache.get(key)
|
|
|
68 |
def get_rag_pipeline(study_name: str) -> RAGPipeline:
|
69 |
"""Get or create a RAGPipeline instance for the given study by querying ChromaDB."""
|
70 |
if study_name not in rag_cache:
|
71 |
+
study = get_study_file_by_name(study_name)
|
|
|
72 |
|
73 |
+
if not study:
|
74 |
raise ValueError(f"Invalid study name: {study_name}")
|
75 |
|
76 |
+
study_file = study.file_path
|
77 |
+
logger.info(f"study_file: {study_file}")
|
78 |
if not study_file:
|
79 |
raise ValueError(f"File path not found for study name: {study_name}")
|
80 |
|
|
|
94 |
study = get_study_file_by_name(study_name)
|
95 |
logger.info(f"Study: {study}")
|
96 |
|
|
|
|
|
|
|
|
|
97 |
if not study:
|
98 |
raise ValueError(f"Invalid study name: {study_name}")
|
99 |
|
|
|
298 |
|
299 |
|
300 |
# PDF Support
|
|
|
|
|
301 |
def process_pdf_uploads(files: List[gr.File], collection_name: str) -> str:
|
302 |
"""Process uploaded PDF files and add them to the system."""
|
303 |
if not files or not collection_name:
|
utils/helpers.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
# utils/helpers.py
|
2 |
|
3 |
import json
|
|
|
4 |
from typing import Any, Dict, List
|
5 |
|
6 |
import chromadb
|
@@ -218,6 +219,29 @@ def add_study_files_to_chromadb(file_path: str, collection_name: str):
|
|
218 |
print("All study files have been successfully added to ChromaDB.")
|
219 |
|
220 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
221 |
if __name__ == "__main__":
|
222 |
# Usage example
|
223 |
add_study_files_to_chromadb("study_files.json", "study_files_collection")
|
|
|
1 |
# utils/helpers.py
|
2 |
|
3 |
import json
|
4 |
+
import os
|
5 |
from typing import Any, Dict, List
|
6 |
|
7 |
import chromadb
|
|
|
219 |
print("All study files have been successfully added to ChromaDB.")
|
220 |
|
221 |
|
222 |
+
def create_directory(directory_path):
|
223 |
+
"""
|
224 |
+
Create a directory.
|
225 |
+
Does not raise an error if the directory already exists.
|
226 |
+
|
227 |
+
Args:
|
228 |
+
directory_path (str): Path of the directory to create
|
229 |
+
|
230 |
+
Returns:
|
231 |
+
bool: True if directory was created or already exists, False if creation failed
|
232 |
+
"""
|
233 |
+
try:
|
234 |
+
# Use exist_ok=True to prevent error if directory exists
|
235 |
+
os.makedirs(directory_path, exist_ok=True)
|
236 |
+
return True
|
237 |
+
except PermissionError:
|
238 |
+
print(f"Permission denied: Cannot create directory {directory_path}")
|
239 |
+
return False
|
240 |
+
except Exception as e:
|
241 |
+
print(f"An unexpected error occurred: {e}")
|
242 |
+
return False
|
243 |
+
|
244 |
+
|
245 |
if __name__ == "__main__":
|
246 |
# Usage example
|
247 |
add_study_files_to_chromadb("study_files.json", "study_files_collection")
|