Spaces:

Sunbird
/

acres

Running

App Files Files Community

Patrick Walukagga commited on Nov 22, 2024

Commit

14a4318

1 Parent(s): d3abbf7

Add liniting

Browse files

Files changed (14) hide show

.flake8 +5 -0
.isort.cfg +7 -0
Makefile +12 -0
api.py +30 -28
app.py +38 -27
docs.py +1 -1
pyproject.toml +19 -0
rag/rag_pipeline.py +5 -10
rag/rag_pipeline_backup.py +4 -5
requirements-dev.txt +3 -0
utils/db.py +2 -2
utils/helpers.py +9 -9
utils/pdf_processor.py +7 -7
utils/prompts.py +3 -2

.flake8 ADDED Viewed

	@@ -0,0 +1,5 @@

+[flake8]
+    ignore = D203, E402, F403, F405, W503, W605
+    exclude = .git,env,__pycache__,docs/source/conf.py,old,build,dist, *migrations*,env,venv,alembic
+    max-complexity = 10
+    max-line-length = 119

.isort.cfg ADDED Viewed

	@@ -0,0 +1,7 @@

+[settings]
+multi_line_output=3
+include_trailing_comma=True
+force_grid_wrap=0
+use_parentheses=True
+line_length=88
+skip=env,migrations,alembic,venv

Makefile ADDED Viewed

	@@ -0,0 +1,12 @@

+.PHONY: lint-apply lint-check
+lint-check:
+	@echo "Checking for lint errors..."
+	flake8 .
+	black --check .
+	isort --check-only .
+lint-apply:
+	@echo "apply linting ..."
+	black .
+	isort .

api.py CHANGED Viewed

@@ -1,13 +1,13 @@
-import os
 import logging
-from fastapi import FastAPI, HTTPException
-from gradio_client import Client
 from enum import Enum
 from typing import List, Optional
-from pydantic import BaseModel, Field, constr, ConfigDict
-from fastapi.responses import FileResponse
 from dotenv import load_dotenv
 from docs import description, tags_metadata
@@ -21,9 +21,10 @@ app = FastAPI(
     openapi_tags=tags_metadata,
 )
 GRADIO_URL = os.getenv("GRADIO_URL", "http://localhost:7860/")
-logger.info(f"GRADIO_URL: =======> {GRADIO_URL}")
 client = Client(GRADIO_URL)
 class StudyVariables(str, Enum):
     ebola_virus = "Ebola Virus"
     vaccine_coverage = "Vaccine coverage"
@@ -35,6 +36,7 @@ class PromptType(str, Enum):
     highlight = "Highlight"
     evidence_based = "Evidence-based"
 class StudyVariableRequest(BaseModel):
     study_variable: StudyVariables
     prompt_type: PromptType
@@ -42,6 +44,7 @@ class StudyVariableRequest(BaseModel):
     model_config = ConfigDict(from_attributes=True)
 class DownloadCSV(BaseModel):
     text: constr(min_length=1, strip_whitespace=True)  # type: ignore
@@ -64,44 +67,41 @@ class ZoteroCredentials(BaseModel):
 @app.post("/process_zotero_library_items", tags=["zotero"])
 def process_zotero_library_items(zotero_credentials: ZoteroCredentials):
     result = client.predict(
-            zotero_library_id=zotero_credentials.library_id,
-            zotero_api_access_key=zotero_credentials.api_access_key,
-            api_name="/process_zotero_library_items"
     )
-    return {"result":result}
 @app.post("/get_study_info", tags=["zotero"])
 def get_study_info(study: Study):
-    result = client.predict(
-            study_name=study.study_name,
-            api_name="/get_study_info"
-    )
     # print(result)
-    return {"result":result}
 @app.post("/study_variables", tags=["zotero"])
-def process_study_variables(study_request: StudyVariableRequest,):
     result = client.predict(
-            text=study_request.text,  # "study id, study title, study design, study summary",
-            study_name=study_request.study_variable,  # "Ebola Virus",
-            prompt_type=study_request.prompt_type,  #"Default",
-            api_name="/process_multi_input"
     )
     print(type(result))
-    return {"result":result[0]}
 @app.post("/download_csv", tags=["zotero"])
 def download_csv(download_request: DownloadCSV):
     result = client.predict(
-            markdown_content=download_request.text,
-            api_name="/download_as_csv"
     )
     print(result)
     file_path = result
     if not file_path or not os.path.exists(file_path):
         raise HTTPException(status_code=404, detail="File not found")
@@ -110,5 +110,7 @@ def download_csv(download_request: DownloadCSV):
     return FileResponse(
         file_path,
         media_type="text/csv",  # Specify the correct MIME type for CSV
-        filename=os.path.basename(file_path)  # Provide a default filename for the download
-    )

 import logging
+import os
 from enum import Enum
 from typing import List, Optional
 from dotenv import load_dotenv
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import FileResponse
+from gradio_client import Client
+from pydantic import BaseModel, ConfigDict, Field, constr
 from docs import description, tags_metadata
     openapi_tags=tags_metadata,
 )
 GRADIO_URL = os.getenv("GRADIO_URL", "http://localhost:7860/")
+logger.info(f"GRADIO_URL: {GRADIO_URL}")
 client = Client(GRADIO_URL)
 class StudyVariables(str, Enum):
     ebola_virus = "Ebola Virus"
     vaccine_coverage = "Vaccine coverage"
     highlight = "Highlight"
     evidence_based = "Evidence-based"
 class StudyVariableRequest(BaseModel):
     study_variable: StudyVariables
     prompt_type: PromptType
     model_config = ConfigDict(from_attributes=True)
 class DownloadCSV(BaseModel):
     text: constr(min_length=1, strip_whitespace=True)  # type: ignore
 @app.post("/process_zotero_library_items", tags=["zotero"])
 def process_zotero_library_items(zotero_credentials: ZoteroCredentials):
     result = client.predict(
+        zotero_library_id=zotero_credentials.library_id,
+        zotero_api_access_key=zotero_credentials.api_access_key,
+        api_name="/process_zotero_library_items",
     )
+    return {"result": result}
 @app.post("/get_study_info", tags=["zotero"])
 def get_study_info(study: Study):
+    result = client.predict(study_name=study.study_name, api_name="/get_study_info")
     # print(result)
+    return {"result": result}
 @app.post("/study_variables", tags=["zotero"])
+def process_study_variables(
+    study_request: StudyVariableRequest,
+):
     result = client.predict(
+        text=study_request.text,  # "study id, study title, study design, study summary",
+        study_name=study_request.study_variable,  # "Ebola Virus",
+        prompt_type=study_request.prompt_type,  # "Default",
+        api_name="/process_multi_input",
     )
     print(type(result))
+    return {"result": result[0]}
 @app.post("/download_csv", tags=["zotero"])
 def download_csv(download_request: DownloadCSV):
     result = client.predict(
+        markdown_content=download_request.text, api_name="/download_as_csv"
     )
     print(result)
     file_path = result
     if not file_path or not os.path.exists(file_path):
         raise HTTPException(status_code=404, detail="File not found")
     return FileResponse(
         file_path,
         media_type="text/csv",  # Specify the correct MIME type for CSV
+        filename=os.path.basename(
+            file_path
+        ),  # Provide a default filename for the download
+    )

app.py CHANGED Viewed

@@ -1,35 +1,38 @@
 # app.py
 import csv
 import datetime
 # from datetime import datetime
 import io
 import json
 import logging
 import os
-from typing import Tuple, List, Any
 import gradio as gr
 import openai
 from dotenv import load_dotenv
 from slugify import slugify
-from cachetools import LRUCache
-from config import STUDY_FILES, OPENAI_API_KEY
 from rag.rag_pipeline import RAGPipeline
 from utils.helpers import (
-    append_to_study_files,
     add_study_files_to_chromadb,
     chromadb_client,
 )
-from utils.db import create_db_and_tables, add_study_files_to_db, get_study_file_by_name, get_study_files_by_library_id, get_all_study_files
-from utils.prompts import highlight_prompt, evidence_based_prompt
-from utils.zotero_manager import ZoteroManager
-from interface import create_chat_interface
 from utils.pdf_processor import PDFProcessor
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -54,11 +57,13 @@ cache = LRUCache(maxsize=100)
 #     data_ = {}
 #     json.dump(data_, file, indent=4)
 def get_cache_value(key):
     return cache.get(key)
 zotero_library_id = get_cache_value("zotero_library_id")
-logger.info(f"zotero_library_id: ======> {zotero_library_id}")
 def get_rag_pipeline(study_name: str) -> RAGPipeline:
@@ -83,22 +88,22 @@ def get_study_info(study_name: str | list) -> str:
     """Retrieve information about the specified study."""
     if isinstance(study_name, list):
         study_name = study_name[0] if study_name else None
     if not study_name:
         return "No study selected"
     study = get_study_file_by_name(study_name)
-    logger.info(f"Study: ======> {study}")
     collection = chromadb_client.get_or_create_collection("study_files_collection")
     result = collection.get(ids=[study_name])  # Query by study name (as a list)
-    logger.info(f"Result: ======> {result}")
     if not result or len(result["metadatas"]) == 0:
         raise ValueError(f"Invalid study name: {study_name}")
     study_file = result["metadatas"][0].get("file_path")
-    logger.info(f"study_file: =======> {study_file}")
     if not study_file:
         raise ValueError(f"File path not found for study name: {study_name}")
@@ -154,7 +159,7 @@ def chat_function(message: str, study_name: str, prompt_type: str) -> str:
         return "Please enter a valid query."
     rag = get_rag_pipeline(study_name)
-    logger.info(f"rag: ==> {rag}")
     prompt = {
         "Highlight": highlight_prompt,
         "Evidence-based": evidence_based_prompt,
@@ -229,7 +234,9 @@ def process_zotero_library_items(
         # Dynamically update study choices
         global study_choices
-        study_choices = [file.name for file in get_study_files_by_library_id([zotero_library_id])]
         message = "Successfully processed items in your zotero library"
     except Exception as e:
         message = f"Error process your zotero library: {str(e)}"
@@ -240,14 +247,16 @@ def process_zotero_library_items(
 def refresh_study_choices():
     """
     Refresh study choices for a specific dropdown instance.
     :return: Updated Dropdown with current study choices
     """
     global study_choices
     zotero_library_id = get_cache_value("zotero_library_id")
-    logger.info(f"zotero_library_id: ====> {zotero_library_id}")
-    study_choices = [file.name for file in get_study_files_by_library_id([zotero_library_id])]
-    logger.info(f"Study choices: ====> {study_choices}")
     return study_choices
@@ -255,7 +264,7 @@ def process_multi_input(text, study_name, prompt_type):
     # Split input based on commas and strip any extra spaces
     variable_list = [word.strip().upper() for word in text.split(",")]
     user_message = f"Extract and present in a tabular format the following variables for each {study_name} study: {', '.join(variable_list)}"
-    logger.info(f"User message: ==> {user_message}")
     response = chat_function(user_message, study_name, prompt_type)
     return [response, gr.update(visible=True)]
@@ -400,7 +409,9 @@ def create_gr_interface() -> gr.Blocks:
                         if zotero_library_id is None:
                             zotero_library_id = get_cache_value("zotero_library_id")
                         logger.info(f"zotero_library_id: =====> {zotero_library_id}")
-                        study_choices_db = get_study_files_by_library_id([zotero_library_id])
                         logger.info(f"study_choices_db: =====> {study_choices_db}")
                         study_files = get_all_study_files()
                         logger.info(f"study_files: =====> {study_files}")
@@ -501,8 +512,8 @@ def create_gr_interface() -> gr.Blocks:
         ).then(fn=cleanup_temp_files, inputs=None, outputs=None)
         refresh_button.click(
-            fn=refresh_study_choices,
-            outputs=[study_dropdown]  # Update the same dropdown
         )
         # Event handlers for PDF Chat tab

 # app.py
 import csv
 import datetime
 # from datetime import datetime
 import io
 import json
 import logging
 import os
+from typing import Any, List, Tuple
 import gradio as gr
 import openai
+from cachetools import LRUCache
 from dotenv import load_dotenv
 from slugify import slugify
+from config import OPENAI_API_KEY, STUDY_FILES
+from interface import create_chat_interface
 from rag.rag_pipeline import RAGPipeline
+from utils.db import (
+    add_study_files_to_db,
+    create_db_and_tables,
+    get_all_study_files,
+    get_study_file_by_name,
+    get_study_files_by_library_id,
+)
 from utils.helpers import (
     add_study_files_to_chromadb,
+    append_to_study_files,
     chromadb_client,
 )
 from utils.pdf_processor import PDFProcessor
+from utils.prompts import evidence_based_prompt, highlight_prompt
+from utils.zotero_manager import ZoteroManager
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 #     data_ = {}
 #     json.dump(data_, file, indent=4)
 def get_cache_value(key):
     return cache.get(key)
 zotero_library_id = get_cache_value("zotero_library_id")
+logger.info(f"zotero_library_id: {zotero_library_id}")
 def get_rag_pipeline(study_name: str) -> RAGPipeline:
     """Retrieve information about the specified study."""
     if isinstance(study_name, list):
         study_name = study_name[0] if study_name else None
     if not study_name:
         return "No study selected"
     study = get_study_file_by_name(study_name)
+    logger.info(f"Study: {study}")
     collection = chromadb_client.get_or_create_collection("study_files_collection")
     result = collection.get(ids=[study_name])  # Query by study name (as a list)
+    logger.info(f"Result: {result}")
     if not result or len(result["metadatas"]) == 0:
         raise ValueError(f"Invalid study name: {study_name}")
     study_file = result["metadatas"][0].get("file_path")
+    logger.info(f"study_file: {study_file}")
     if not study_file:
         raise ValueError(f"File path not found for study name: {study_name}")
         return "Please enter a valid query."
     rag = get_rag_pipeline(study_name)
+    logger.info(f"rag: {rag}")
     prompt = {
         "Highlight": highlight_prompt,
         "Evidence-based": evidence_based_prompt,
         # Dynamically update study choices
         global study_choices
+        study_choices = [
+            file.name for file in get_study_files_by_library_id([zotero_library_id])
+        ]
         message = "Successfully processed items in your zotero library"
     except Exception as e:
         message = f"Error process your zotero library: {str(e)}"
 def refresh_study_choices():
     """
     Refresh study choices for a specific dropdown instance.
     :return: Updated Dropdown with current study choices
     """
     global study_choices
     zotero_library_id = get_cache_value("zotero_library_id")
+    logger.info(f"zotero_library_id: {zotero_library_id}")
+    study_choices = [
+        file.name for file in get_study_files_by_library_id([zotero_library_id])
+    ]
+    logger.info(f"Study choices: {study_choices}")
     return study_choices
     # Split input based on commas and strip any extra spaces
     variable_list = [word.strip().upper() for word in text.split(",")]
     user_message = f"Extract and present in a tabular format the following variables for each {study_name} study: {', '.join(variable_list)}"
+    logger.info(f"User message: {user_message}")
     response = chat_function(user_message, study_name, prompt_type)
     return [response, gr.update(visible=True)]
                         if zotero_library_id is None:
                             zotero_library_id = get_cache_value("zotero_library_id")
                         logger.info(f"zotero_library_id: =====> {zotero_library_id}")
+                        study_choices_db = get_study_files_by_library_id(
+                            [zotero_library_id]
+                        )
                         logger.info(f"study_choices_db: =====> {study_choices_db}")
                         study_files = get_all_study_files()
                         logger.info(f"study_files: =====> {study_files}")
         ).then(fn=cleanup_temp_files, inputs=None, outputs=None)
         refresh_button.click(
+            fn=refresh_study_choices,
+            outputs=[study_dropdown],  # Update the same dropdown
         )
         # Event handlers for PDF Chat tab

docs.py CHANGED Viewed

@@ -10,4 +10,4 @@ Welcome to the Acres AI RAG API documentation.
 tags_metadata = [
     {"name": "ACRES RAG", "description": "AI RAG Application"},
-]

 tags_metadata = [
     {"name": "ACRES RAG", "description": "AI RAG Application"},
+]

pyproject.toml ADDED Viewed

	@@ -0,0 +1,19 @@

+[tool.black]
+include = '\.pyi?$'
+exclude = '''
+/(
+    \.git
+  | \.hg
+  | \.mypy_cache
+  | \.tox
+  | \.venv
+  | env
+  |venv
+  | _build
+  | buck-out
+  | build
+  | dist
+  | migrations
+  |alembic
+)/
+'''

rag/rag_pipeline.py CHANGED Viewed

@@ -1,19 +1,15 @@
 # rag/rag_pipeline.py
 import json
 import logging
-from typing import Dict, Any, List
-from llama_index.core import Document, VectorStoreIndex
-from llama_index.core.node_parser import SentenceWindowNodeParser, SentenceSplitter
-from llama_index.core import PromptTemplate
 from llama_index.embeddings.openai import OpenAIEmbedding
 from llama_index.llms.openai import OpenAI
 from llama_index.vector_stores.chroma import ChromaVectorStore
-import chromadb
-from typing import Dict, Any, List, Tuple, Optional
-import re
-import logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -172,7 +168,6 @@ class RAGPipeline:
             self.extract_page_number_from_query(context) if self.is_pdf else None
         )
         # This is a hack to index all the documents in the store :)
         n_documents = len(self.index.docstore.docs)
         print(f"n_documents: {n_documents}")

 # rag/rag_pipeline.py
 import json
 import logging
+import re
+from typing import Any, Dict, List, Optional, Tuple
+import chromadb
+from llama_index.core import Document, PromptTemplate, VectorStoreIndex
+from llama_index.core.node_parser import SentenceSplitter, SentenceWindowNodeParser
 from llama_index.embeddings.openai import OpenAIEmbedding
 from llama_index.llms.openai import OpenAI
 from llama_index.vector_stores.chroma import ChromaVectorStore
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
             self.extract_page_number_from_query(context) if self.is_pdf else None
         )
         # This is a hack to index all the documents in the store :)
         n_documents = len(self.index.docstore.docs)
         print(f"n_documents: {n_documents}")

rag/rag_pipeline_backup.py CHANGED Viewed

@@ -1,9 +1,8 @@
 import json
-from typing import Dict, Any
-from llama_index.core import Document, VectorStoreIndex
-from llama_index.core.node_parser import SentenceWindowNodeParser, SentenceSplitter
-from llama_index.core import PromptTemplate
-from typing import List
 from llama_index.embeddings.openai import OpenAIEmbedding
 from llama_index.llms.openai import OpenAI

 import json
+from typing import Any, Dict, List
+from llama_index.core import Document, PromptTemplate, VectorStoreIndex
+from llama_index.core.node_parser import SentenceSplitter, SentenceWindowNodeParser
 from llama_index.embeddings.openai import OpenAIEmbedding
 from llama_index.llms.openai import OpenAI

requirements-dev.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+black==24.10.0
+isort==5.13.2
+flake8==7.1.1

utils/db.py CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a050d39acd75098f97fd8a7032c231c5bf1865398703cd9253f61ff3a67ab294
-size 4954

 version https://git-lfs.github.com/spec/v1
+oid sha256:84acae8e51383d6990cd9edb7c1684292e523e7d0af87a71531bd5f9cf2909b5
+size 4907

utils/helpers.py CHANGED Viewed

@@ -1,18 +1,18 @@
 # utils/helpers.py
-from typing import Dict, Any
 from llama_index.core import Response
-from typing import List
 from rag.rag_pipeline import RAGPipeline
 from utils.prompts import (
-    structured_follow_up_prompt,
-    VaccineCoverageVariables,
     StudyCharacteristics,
 )
-import json
-import json
-import chromadb
-from chromadb.api.types import Document
 # Initialize ChromaDB client
 chromadb_client = chromadb.Client()
@@ -88,7 +88,7 @@ def append_to_study_files(file_path, new_key, new_value):
             "Gene Xpert": "data/gene_xpert_zotero_items.json"
         }
     """
-    try:
         # Read the existing data from the file
         with open(file_path, "r") as file:
             data = json.load(file)

 # utils/helpers.py
+import json
+from typing import Any, Dict, List
+import chromadb
+from chromadb.api.types import Document
 from llama_index.core import Response
 from rag.rag_pipeline import RAGPipeline
 from utils.prompts import (
     StudyCharacteristics,
+    VaccineCoverageVariables,
+    structured_follow_up_prompt,
 )
 # Initialize ChromaDB client
 chromadb_client = chromadb.Client()
             "Gene Xpert": "data/gene_xpert_zotero_items.json"
         }
     """
+    try:
         # Read the existing data from the file
         with open(file_path, "r") as file:
             data = json.load(file)

utils/pdf_processor.py CHANGED Viewed

@@ -3,17 +3,17 @@ PDF processing module for ACRES RAG Platform.
 Handles PDF file processing, text extraction, and page rendering.
 """
-# utils/pdf_processor.py
-import os
-import fitz
-import logging
-from typing import Dict, List, Optional
 import datetime
-from slugify import slugify
 import json
-from PIL import Image
 import re
 logger = logging.getLogger(__name__)

 Handles PDF file processing, text extraction, and page rendering.
 """
 import datetime
 import json
+import logging
+# utils/pdf_processor.py
+import os
 import re
+from typing import Dict, List, Optional
+import fitz
+from PIL import Image
+from slugify import slugify
 logger = logging.getLogger(__name__)

utils/prompts.py CHANGED Viewed

@@ -1,9 +1,10 @@
 # utils/prompts.py
 from llama_index.core import PromptTemplate
-from typing import Optional, List
-from pydantic import BaseModel, Field
 from llama_index.core.prompts import PromptTemplate
 class StudyCharacteristics(BaseModel):

 # utils/prompts.py
+from typing import List, Optional
 from llama_index.core import PromptTemplate
 from llama_index.core.prompts import PromptTemplate
+from pydantic import BaseModel, Field
 class StudyCharacteristics(BaseModel):