Spaces:

Sunbird
/

acres

Sleeping

App Files Files Community

Patrick Walukagga commited on Nov 27, 2024

Commit

17249df

1 Parent(s): 0ec9b61

Update README instructions

Browse files

Files changed (3) hide show

README.md +37 -0
app.py +7 -14
utils/helpers.py +24 -0

README.md CHANGED Viewed

@@ -60,6 +60,15 @@ gradio app.py
 Browse the application with the link `http://localhost:7860/`
 ## Run with docker
 To run the application with docker locally, first make sure you have docker installed. See [link](https://docs.docker.com/)
@@ -84,6 +93,15 @@ docker run -it -p 7860:7860 --rm --name gradio --network=gradio-fastapi-network
 Browse the application with the link `http://localhost:7860/`
 ## Deploy to AWS ECS (Elastic Container Service) with Fargate
@@ -149,6 +167,25 @@ docker tag gradio-app-prod:latest "${ECR_BACKEND_GRADIO_URL}:latest"
 docker push "${ECR_BACKEND_GRADIO_URL}:latest"
 ```
 ### Setup and Provision AWS ECS infra using AWS Cloudformation (IaC)
 #### Install

 Browse the application with the link `http://localhost:7860/`
+### Run the api
+Make sure the gradio app is running on port `7860` and then run the command below in another terminal tab in the same directory.
+```sh
+uvicorn api:app --reload
+```
+Browse the api at `http://localhost:8000/docs`
 ## Run with docker
 To run the application with docker locally, first make sure you have docker installed. See [link](https://docs.docker.com/)
 Browse the application with the link `http://localhost:7860/`
+To run the api with docker run the commands below. The gradio container should be run first before running the api.
+```sh
+docker build -f Dockerfile.api -t fastapi-app .
+docker run -it -p 8000:8000 --rm --name fastapi --network=gradio-fastapi-network fastapi-app
+```
+Browse the api at `http://localhost:8000/docs`
 ## Deploy to AWS ECS (Elastic Container Service) with Fargate
 docker push "${ECR_BACKEND_GRADIO_URL}:latest"
 ```
+- Now create fastapi repostory
+```sh
+aws ecr create-repository \
+  --repository-name fastapi-api-prod \
+  --image-tag-mutability MUTABLE
+export ECR_BACKEND_FASTAPI_URL="$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/fastapi-api-prod"
+echo $ECR_BACKEND_FASTAPI_URL
+```
+- Build the docker image for the production and push to ECR
+```sh
+docker build -f Dockerfile.api.prod -t fastapi-api-prod .
+docker tag fastapi-api-prod:latest "${ECR_BACKEND_FASTAPI_URL}:latest"
+docker push "${ECR_BACKEND_FASTAPI_URL}:latest"
+```
 ### Setup and Provision AWS ECS infra using AWS Cloudformation (IaC)
 #### Install

app.py CHANGED Viewed

@@ -29,11 +29,14 @@ from utils.helpers import (
     add_study_files_to_chromadb,
     append_to_study_files,
     chromadb_client,
 )
 from utils.pdf_processor import PDFProcessor
 from utils.prompts import evidence_based_prompt, highlight_prompt
 from utils.zotero_manager import ZoteroManager
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -53,10 +56,6 @@ rag_cache = {}
 cache = LRUCache(maxsize=100)
-# with open("study_files.json", "w") as file:
-#     data_ = {}
-#     json.dump(data_, file, indent=4)
 def get_cache_value(key):
     return cache.get(key)
@@ -69,13 +68,13 @@ logger.info(f"zotero_library_id cache: {zotero_library_id}")
 def get_rag_pipeline(study_name: str) -> RAGPipeline:
     """Get or create a RAGPipeline instance for the given study by querying ChromaDB."""
     if study_name not in rag_cache:
-        collection = chromadb_client.get_or_create_collection("study_files_collection")
-        result = collection.get(ids=[study_name])  # Retrieve document by ID
-        if not result or len(result["metadatas"]) == 0:
             raise ValueError(f"Invalid study name: {study_name}")
-        study_file = result["metadatas"][0].get("file_path")
         if not study_file:
             raise ValueError(f"File path not found for study name: {study_name}")
@@ -95,10 +94,6 @@ def get_study_info(study_name: str | list) -> str:
     study = get_study_file_by_name(study_name)
     logger.info(f"Study: {study}")
-    # collection = chromadb_client.get_or_create_collection("study_files_collection")
-    # result = collection.get(ids=[study_name])  # Query by study name (as a list)
-    # logger.info(f"Result: {result}")
     if not study:
         raise ValueError(f"Invalid study name: {study_name}")
@@ -303,8 +298,6 @@ def download_as_csv(markdown_content):
 # PDF Support
 def process_pdf_uploads(files: List[gr.File], collection_name: str) -> str:
     """Process uploaded PDF files and add them to the system."""
     if not files or not collection_name:

     add_study_files_to_chromadb,
     append_to_study_files,
     chromadb_client,
+    create_directory,
 )
 from utils.pdf_processor import PDFProcessor
 from utils.prompts import evidence_based_prompt, highlight_prompt
 from utils.zotero_manager import ZoteroManager
+data_directory = "data"
+create_directory(data_directory)
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 cache = LRUCache(maxsize=100)
 def get_cache_value(key):
     return cache.get(key)
 def get_rag_pipeline(study_name: str) -> RAGPipeline:
     """Get or create a RAGPipeline instance for the given study by querying ChromaDB."""
     if study_name not in rag_cache:
+        study = get_study_file_by_name(study_name)
+        if not study:
             raise ValueError(f"Invalid study name: {study_name}")
+        study_file = study.file_path
+        logger.info(f"study_file: {study_file}")
         if not study_file:
             raise ValueError(f"File path not found for study name: {study_name}")
     study = get_study_file_by_name(study_name)
     logger.info(f"Study: {study}")
     if not study:
         raise ValueError(f"Invalid study name: {study_name}")
 # PDF Support
 def process_pdf_uploads(files: List[gr.File], collection_name: str) -> str:
     """Process uploaded PDF files and add them to the system."""
     if not files or not collection_name:

utils/helpers.py CHANGED Viewed

@@ -1,6 +1,7 @@
 # utils/helpers.py
 import json
 from typing import Any, Dict, List
 import chromadb
@@ -218,6 +219,29 @@ def add_study_files_to_chromadb(file_path: str, collection_name: str):
     print("All study files have been successfully added to ChromaDB.")
 if __name__ == "__main__":
     # Usage example
     add_study_files_to_chromadb("study_files.json", "study_files_collection")

 # utils/helpers.py
 import json
+import os
 from typing import Any, Dict, List
 import chromadb
     print("All study files have been successfully added to ChromaDB.")
+def create_directory(directory_path):
+    """
+    Create a directory.
+    Does not raise an error if the directory already exists.
+    Args:
+        directory_path (str): Path of the directory to create
+    Returns:
+        bool: True if directory was created or already exists, False if creation failed
+    """
+    try:
+        # Use exist_ok=True to prevent error if directory exists
+        os.makedirs(directory_path, exist_ok=True)
+        return True
+    except PermissionError:
+        print(f"Permission denied: Cannot create directory {directory_path}")
+        return False
+    except Exception as e:
+        print(f"An unexpected error occurred: {e}")
+        return False
 if __name__ == "__main__":
     # Usage example
     add_study_files_to_chromadb("study_files.json", "study_files_collection")