Patrick Walukagga commited on
Commit
17249df
·
1 Parent(s): 0ec9b61

Update README instructions

Browse files
Files changed (3) hide show
  1. README.md +37 -0
  2. app.py +7 -14
  3. utils/helpers.py +24 -0
README.md CHANGED
@@ -60,6 +60,15 @@ gradio app.py
60
 
61
  Browse the application with the link `http://localhost:7860/`
62
 
 
 
 
 
 
 
 
 
 
63
 
64
  ## Run with docker
65
  To run the application with docker locally, first make sure you have docker installed. See [link](https://docs.docker.com/)
@@ -84,6 +93,15 @@ docker run -it -p 7860:7860 --rm --name gradio --network=gradio-fastapi-network
84
 
85
  Browse the application with the link `http://localhost:7860/`
86
 
 
 
 
 
 
 
 
 
 
87
 
88
  ## Deploy to AWS ECS (Elastic Container Service) with Fargate
89
 
@@ -149,6 +167,25 @@ docker tag gradio-app-prod:latest "${ECR_BACKEND_GRADIO_URL}:latest"
149
  docker push "${ECR_BACKEND_GRADIO_URL}:latest"
150
  ```
151
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  ### Setup and Provision AWS ECS infra using AWS Cloudformation (IaC)
153
 
154
  #### Install
 
60
 
61
  Browse the application with the link `http://localhost:7860/`
62
 
63
+ ### Run the api
64
+ Make sure the gradio app is running on port `7860` and then run the command below in another terminal tab in the same directory.
65
+
66
+ ```sh
67
+ uvicorn api:app --reload
68
+ ```
69
+
70
+ Browse the api at `http://localhost:8000/docs`
71
+
72
 
73
  ## Run with docker
74
  To run the application with docker locally, first make sure you have docker installed. See [link](https://docs.docker.com/)
 
93
 
94
  Browse the application with the link `http://localhost:7860/`
95
 
96
+ To run the api with docker run the commands below. The gradio container should be run first before running the api.
97
+
98
+ ```sh
99
+ docker build -f Dockerfile.api -t fastapi-app .
100
+ docker run -it -p 8000:8000 --rm --name fastapi --network=gradio-fastapi-network fastapi-app
101
+ ```
102
+
103
+ Browse the api at `http://localhost:8000/docs`
104
+
105
 
106
  ## Deploy to AWS ECS (Elastic Container Service) with Fargate
107
 
 
167
  docker push "${ECR_BACKEND_GRADIO_URL}:latest"
168
  ```
169
 
170
+ - Now create fastapi repostory
171
+
172
+ ```sh
173
+ aws ecr create-repository \
174
+ --repository-name fastapi-api-prod \
175
+ --image-tag-mutability MUTABLE
176
+
177
+ export ECR_BACKEND_FASTAPI_URL="$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/fastapi-api-prod"
178
+ echo $ECR_BACKEND_FASTAPI_URL
179
+ ```
180
+
181
+ - Build the docker image for the production and push to ECR
182
+
183
+ ```sh
184
+ docker build -f Dockerfile.api.prod -t fastapi-api-prod .
185
+ docker tag fastapi-api-prod:latest "${ECR_BACKEND_FASTAPI_URL}:latest"
186
+ docker push "${ECR_BACKEND_FASTAPI_URL}:latest"
187
+ ```
188
+
189
  ### Setup and Provision AWS ECS infra using AWS Cloudformation (IaC)
190
 
191
  #### Install
app.py CHANGED
@@ -29,11 +29,14 @@ from utils.helpers import (
29
  add_study_files_to_chromadb,
30
  append_to_study_files,
31
  chromadb_client,
 
32
  )
33
  from utils.pdf_processor import PDFProcessor
34
  from utils.prompts import evidence_based_prompt, highlight_prompt
35
  from utils.zotero_manager import ZoteroManager
36
 
 
 
37
  # Configure logging
38
  logging.basicConfig(level=logging.INFO)
39
  logger = logging.getLogger(__name__)
@@ -53,10 +56,6 @@ rag_cache = {}
53
 
54
  cache = LRUCache(maxsize=100)
55
 
56
- # with open("study_files.json", "w") as file:
57
- # data_ = {}
58
- # json.dump(data_, file, indent=4)
59
-
60
 
61
  def get_cache_value(key):
62
  return cache.get(key)
@@ -69,13 +68,13 @@ logger.info(f"zotero_library_id cache: {zotero_library_id}")
69
  def get_rag_pipeline(study_name: str) -> RAGPipeline:
70
  """Get or create a RAGPipeline instance for the given study by querying ChromaDB."""
71
  if study_name not in rag_cache:
72
- collection = chromadb_client.get_or_create_collection("study_files_collection")
73
- result = collection.get(ids=[study_name]) # Retrieve document by ID
74
 
75
- if not result or len(result["metadatas"]) == 0:
76
  raise ValueError(f"Invalid study name: {study_name}")
77
 
78
- study_file = result["metadatas"][0].get("file_path")
 
79
  if not study_file:
80
  raise ValueError(f"File path not found for study name: {study_name}")
81
 
@@ -95,10 +94,6 @@ def get_study_info(study_name: str | list) -> str:
95
  study = get_study_file_by_name(study_name)
96
  logger.info(f"Study: {study}")
97
 
98
- # collection = chromadb_client.get_or_create_collection("study_files_collection")
99
- # result = collection.get(ids=[study_name]) # Query by study name (as a list)
100
- # logger.info(f"Result: {result}")
101
-
102
  if not study:
103
  raise ValueError(f"Invalid study name: {study_name}")
104
 
@@ -303,8 +298,6 @@ def download_as_csv(markdown_content):
303
 
304
 
305
  # PDF Support
306
-
307
-
308
  def process_pdf_uploads(files: List[gr.File], collection_name: str) -> str:
309
  """Process uploaded PDF files and add them to the system."""
310
  if not files or not collection_name:
 
29
  add_study_files_to_chromadb,
30
  append_to_study_files,
31
  chromadb_client,
32
+ create_directory,
33
  )
34
  from utils.pdf_processor import PDFProcessor
35
  from utils.prompts import evidence_based_prompt, highlight_prompt
36
  from utils.zotero_manager import ZoteroManager
37
 
38
+ data_directory = "data"
39
+ create_directory(data_directory)
40
  # Configure logging
41
  logging.basicConfig(level=logging.INFO)
42
  logger = logging.getLogger(__name__)
 
56
 
57
  cache = LRUCache(maxsize=100)
58
 
 
 
 
 
59
 
60
  def get_cache_value(key):
61
  return cache.get(key)
 
68
  def get_rag_pipeline(study_name: str) -> RAGPipeline:
69
  """Get or create a RAGPipeline instance for the given study by querying ChromaDB."""
70
  if study_name not in rag_cache:
71
+ study = get_study_file_by_name(study_name)
 
72
 
73
+ if not study:
74
  raise ValueError(f"Invalid study name: {study_name}")
75
 
76
+ study_file = study.file_path
77
+ logger.info(f"study_file: {study_file}")
78
  if not study_file:
79
  raise ValueError(f"File path not found for study name: {study_name}")
80
 
 
94
  study = get_study_file_by_name(study_name)
95
  logger.info(f"Study: {study}")
96
 
 
 
 
 
97
  if not study:
98
  raise ValueError(f"Invalid study name: {study_name}")
99
 
 
298
 
299
 
300
  # PDF Support
 
 
301
  def process_pdf_uploads(files: List[gr.File], collection_name: str) -> str:
302
  """Process uploaded PDF files and add them to the system."""
303
  if not files or not collection_name:
utils/helpers.py CHANGED
@@ -1,6 +1,7 @@
1
  # utils/helpers.py
2
 
3
  import json
 
4
  from typing import Any, Dict, List
5
 
6
  import chromadb
@@ -218,6 +219,29 @@ def add_study_files_to_chromadb(file_path: str, collection_name: str):
218
  print("All study files have been successfully added to ChromaDB.")
219
 
220
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  if __name__ == "__main__":
222
  # Usage example
223
  add_study_files_to_chromadb("study_files.json", "study_files_collection")
 
1
  # utils/helpers.py
2
 
3
  import json
4
+ import os
5
  from typing import Any, Dict, List
6
 
7
  import chromadb
 
219
  print("All study files have been successfully added to ChromaDB.")
220
 
221
 
222
+ def create_directory(directory_path):
223
+ """
224
+ Create a directory.
225
+ Does not raise an error if the directory already exists.
226
+
227
+ Args:
228
+ directory_path (str): Path of the directory to create
229
+
230
+ Returns:
231
+ bool: True if directory was created or already exists, False if creation failed
232
+ """
233
+ try:
234
+ # Use exist_ok=True to prevent error if directory exists
235
+ os.makedirs(directory_path, exist_ok=True)
236
+ return True
237
+ except PermissionError:
238
+ print(f"Permission denied: Cannot create directory {directory_path}")
239
+ return False
240
+ except Exception as e:
241
+ print(f"An unexpected error occurred: {e}")
242
+ return False
243
+
244
+
245
  if __name__ == "__main__":
246
  # Usage example
247
  add_study_files_to_chromadb("study_files.json", "study_files_collection")