Patrick Walukagga commited on
Commit
14a4318
·
1 Parent(s): d3abbf7

Add liniting

Browse files
.flake8 ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ [flake8]
2
+ ignore = D203, E402, F403, F405, W503, W605
3
+ exclude = .git,env,__pycache__,docs/source/conf.py,old,build,dist, *migrations*,env,venv,alembic
4
+ max-complexity = 10
5
+ max-line-length = 119
.isort.cfg ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ [settings]
2
+ multi_line_output=3
3
+ include_trailing_comma=True
4
+ force_grid_wrap=0
5
+ use_parentheses=True
6
+ line_length=88
7
+ skip=env,migrations,alembic,venv
Makefile ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .PHONY: lint-apply lint-check
2
+
3
+ lint-check:
4
+ @echo "Checking for lint errors..."
5
+ flake8 .
6
+ black --check .
7
+ isort --check-only .
8
+
9
+ lint-apply:
10
+ @echo "apply linting ..."
11
+ black .
12
+ isort .
api.py CHANGED
@@ -1,13 +1,13 @@
1
- import os
2
  import logging
3
-
4
- from fastapi import FastAPI, HTTPException
5
- from gradio_client import Client
6
  from enum import Enum
7
  from typing import List, Optional
8
- from pydantic import BaseModel, Field, constr, ConfigDict
9
- from fastapi.responses import FileResponse
10
  from dotenv import load_dotenv
 
 
 
 
11
 
12
  from docs import description, tags_metadata
13
 
@@ -21,9 +21,10 @@ app = FastAPI(
21
  openapi_tags=tags_metadata,
22
  )
23
  GRADIO_URL = os.getenv("GRADIO_URL", "http://localhost:7860/")
24
- logger.info(f"GRADIO_URL: =======> {GRADIO_URL}")
25
  client = Client(GRADIO_URL)
26
 
 
27
  class StudyVariables(str, Enum):
28
  ebola_virus = "Ebola Virus"
29
  vaccine_coverage = "Vaccine coverage"
@@ -35,6 +36,7 @@ class PromptType(str, Enum):
35
  highlight = "Highlight"
36
  evidence_based = "Evidence-based"
37
 
 
38
  class StudyVariableRequest(BaseModel):
39
  study_variable: StudyVariables
40
  prompt_type: PromptType
@@ -42,6 +44,7 @@ class StudyVariableRequest(BaseModel):
42
 
43
  model_config = ConfigDict(from_attributes=True)
44
 
 
45
  class DownloadCSV(BaseModel):
46
  text: constr(min_length=1, strip_whitespace=True) # type: ignore
47
 
@@ -64,44 +67,41 @@ class ZoteroCredentials(BaseModel):
64
  @app.post("/process_zotero_library_items", tags=["zotero"])
65
  def process_zotero_library_items(zotero_credentials: ZoteroCredentials):
66
  result = client.predict(
67
- zotero_library_id=zotero_credentials.library_id,
68
- zotero_api_access_key=zotero_credentials.api_access_key,
69
- api_name="/process_zotero_library_items"
70
  )
71
- return {"result":result}
72
-
73
 
74
 
75
  @app.post("/get_study_info", tags=["zotero"])
76
  def get_study_info(study: Study):
77
- result = client.predict(
78
- study_name=study.study_name,
79
- api_name="/get_study_info"
80
- )
81
  # print(result)
82
- return {"result":result}
83
 
84
 
85
  @app.post("/study_variables", tags=["zotero"])
86
- def process_study_variables(study_request: StudyVariableRequest,):
 
 
87
  result = client.predict(
88
- text=study_request.text, # "study id, study title, study design, study summary",
89
- study_name=study_request.study_variable, # "Ebola Virus",
90
- prompt_type=study_request.prompt_type, #"Default",
91
- api_name="/process_multi_input"
92
  )
93
  print(type(result))
94
- return {"result":result[0]}
95
 
96
 
97
  @app.post("/download_csv", tags=["zotero"])
98
  def download_csv(download_request: DownloadCSV):
99
  result = client.predict(
100
- markdown_content=download_request.text,
101
- api_name="/download_as_csv"
102
  )
103
  print(result)
104
-
105
  file_path = result
106
  if not file_path or not os.path.exists(file_path):
107
  raise HTTPException(status_code=404, detail="File not found")
@@ -110,5 +110,7 @@ def download_csv(download_request: DownloadCSV):
110
  return FileResponse(
111
  file_path,
112
  media_type="text/csv", # Specify the correct MIME type for CSV
113
- filename=os.path.basename(file_path) # Provide a default filename for the download
114
- )
 
 
 
 
1
  import logging
2
+ import os
 
 
3
  from enum import Enum
4
  from typing import List, Optional
5
+
 
6
  from dotenv import load_dotenv
7
+ from fastapi import FastAPI, HTTPException
8
+ from fastapi.responses import FileResponse
9
+ from gradio_client import Client
10
+ from pydantic import BaseModel, ConfigDict, Field, constr
11
 
12
  from docs import description, tags_metadata
13
 
 
21
  openapi_tags=tags_metadata,
22
  )
23
  GRADIO_URL = os.getenv("GRADIO_URL", "http://localhost:7860/")
24
+ logger.info(f"GRADIO_URL: {GRADIO_URL}")
25
  client = Client(GRADIO_URL)
26
 
27
+
28
  class StudyVariables(str, Enum):
29
  ebola_virus = "Ebola Virus"
30
  vaccine_coverage = "Vaccine coverage"
 
36
  highlight = "Highlight"
37
  evidence_based = "Evidence-based"
38
 
39
+
40
  class StudyVariableRequest(BaseModel):
41
  study_variable: StudyVariables
42
  prompt_type: PromptType
 
44
 
45
  model_config = ConfigDict(from_attributes=True)
46
 
47
+
48
  class DownloadCSV(BaseModel):
49
  text: constr(min_length=1, strip_whitespace=True) # type: ignore
50
 
 
67
  @app.post("/process_zotero_library_items", tags=["zotero"])
68
  def process_zotero_library_items(zotero_credentials: ZoteroCredentials):
69
  result = client.predict(
70
+ zotero_library_id=zotero_credentials.library_id,
71
+ zotero_api_access_key=zotero_credentials.api_access_key,
72
+ api_name="/process_zotero_library_items",
73
  )
74
+ return {"result": result}
 
75
 
76
 
77
  @app.post("/get_study_info", tags=["zotero"])
78
  def get_study_info(study: Study):
79
+ result = client.predict(study_name=study.study_name, api_name="/get_study_info")
 
 
 
80
  # print(result)
81
+ return {"result": result}
82
 
83
 
84
  @app.post("/study_variables", tags=["zotero"])
85
+ def process_study_variables(
86
+ study_request: StudyVariableRequest,
87
+ ):
88
  result = client.predict(
89
+ text=study_request.text, # "study id, study title, study design, study summary",
90
+ study_name=study_request.study_variable, # "Ebola Virus",
91
+ prompt_type=study_request.prompt_type, # "Default",
92
+ api_name="/process_multi_input",
93
  )
94
  print(type(result))
95
+ return {"result": result[0]}
96
 
97
 
98
  @app.post("/download_csv", tags=["zotero"])
99
  def download_csv(download_request: DownloadCSV):
100
  result = client.predict(
101
+ markdown_content=download_request.text, api_name="/download_as_csv"
 
102
  )
103
  print(result)
104
+
105
  file_path = result
106
  if not file_path or not os.path.exists(file_path):
107
  raise HTTPException(status_code=404, detail="File not found")
 
110
  return FileResponse(
111
  file_path,
112
  media_type="text/csv", # Specify the correct MIME type for CSV
113
+ filename=os.path.basename(
114
+ file_path
115
+ ), # Provide a default filename for the download
116
+ )
app.py CHANGED
@@ -1,35 +1,38 @@
1
  # app.py
2
 
3
  import csv
4
-
5
  import datetime
6
-
7
  # from datetime import datetime
8
  import io
9
  import json
10
  import logging
11
  import os
12
- from typing import Tuple, List, Any
13
 
14
  import gradio as gr
15
  import openai
 
16
  from dotenv import load_dotenv
17
  from slugify import slugify
18
- from cachetools import LRUCache
19
 
20
- from config import STUDY_FILES, OPENAI_API_KEY
 
21
  from rag.rag_pipeline import RAGPipeline
 
 
 
 
 
 
 
22
  from utils.helpers import (
23
- append_to_study_files,
24
  add_study_files_to_chromadb,
 
25
  chromadb_client,
26
  )
27
- from utils.db import create_db_and_tables, add_study_files_to_db, get_study_file_by_name, get_study_files_by_library_id, get_all_study_files
28
- from utils.prompts import highlight_prompt, evidence_based_prompt
29
- from utils.zotero_manager import ZoteroManager
30
-
31
- from interface import create_chat_interface
32
  from utils.pdf_processor import PDFProcessor
 
 
33
 
34
  # Configure logging
35
  logging.basicConfig(level=logging.INFO)
@@ -54,11 +57,13 @@ cache = LRUCache(maxsize=100)
54
  # data_ = {}
55
  # json.dump(data_, file, indent=4)
56
 
 
57
  def get_cache_value(key):
58
  return cache.get(key)
59
 
 
60
  zotero_library_id = get_cache_value("zotero_library_id")
61
- logger.info(f"zotero_library_id: ======> {zotero_library_id}")
62
 
63
 
64
  def get_rag_pipeline(study_name: str) -> RAGPipeline:
@@ -83,22 +88,22 @@ def get_study_info(study_name: str | list) -> str:
83
  """Retrieve information about the specified study."""
84
  if isinstance(study_name, list):
85
  study_name = study_name[0] if study_name else None
86
-
87
  if not study_name:
88
  return "No study selected"
89
-
90
  study = get_study_file_by_name(study_name)
91
- logger.info(f"Study: ======> {study}")
92
 
93
  collection = chromadb_client.get_or_create_collection("study_files_collection")
94
  result = collection.get(ids=[study_name]) # Query by study name (as a list)
95
- logger.info(f"Result: ======> {result}")
96
 
97
  if not result or len(result["metadatas"]) == 0:
98
  raise ValueError(f"Invalid study name: {study_name}")
99
 
100
  study_file = result["metadatas"][0].get("file_path")
101
- logger.info(f"study_file: =======> {study_file}")
102
  if not study_file:
103
  raise ValueError(f"File path not found for study name: {study_name}")
104
 
@@ -154,7 +159,7 @@ def chat_function(message: str, study_name: str, prompt_type: str) -> str:
154
  return "Please enter a valid query."
155
 
156
  rag = get_rag_pipeline(study_name)
157
- logger.info(f"rag: ==> {rag}")
158
  prompt = {
159
  "Highlight": highlight_prompt,
160
  "Evidence-based": evidence_based_prompt,
@@ -229,7 +234,9 @@ def process_zotero_library_items(
229
 
230
  # Dynamically update study choices
231
  global study_choices
232
- study_choices = [file.name for file in get_study_files_by_library_id([zotero_library_id])]
 
 
233
  message = "Successfully processed items in your zotero library"
234
  except Exception as e:
235
  message = f"Error process your zotero library: {str(e)}"
@@ -240,14 +247,16 @@ def process_zotero_library_items(
240
  def refresh_study_choices():
241
  """
242
  Refresh study choices for a specific dropdown instance.
243
-
244
  :return: Updated Dropdown with current study choices
245
  """
246
  global study_choices
247
  zotero_library_id = get_cache_value("zotero_library_id")
248
- logger.info(f"zotero_library_id: ====> {zotero_library_id}")
249
- study_choices = [file.name for file in get_study_files_by_library_id([zotero_library_id])]
250
- logger.info(f"Study choices: ====> {study_choices}")
 
 
251
  return study_choices
252
 
253
 
@@ -255,7 +264,7 @@ def process_multi_input(text, study_name, prompt_type):
255
  # Split input based on commas and strip any extra spaces
256
  variable_list = [word.strip().upper() for word in text.split(",")]
257
  user_message = f"Extract and present in a tabular format the following variables for each {study_name} study: {', '.join(variable_list)}"
258
- logger.info(f"User message: ==> {user_message}")
259
  response = chat_function(user_message, study_name, prompt_type)
260
  return [response, gr.update(visible=True)]
261
 
@@ -400,7 +409,9 @@ def create_gr_interface() -> gr.Blocks:
400
  if zotero_library_id is None:
401
  zotero_library_id = get_cache_value("zotero_library_id")
402
  logger.info(f"zotero_library_id: =====> {zotero_library_id}")
403
- study_choices_db = get_study_files_by_library_id([zotero_library_id])
 
 
404
  logger.info(f"study_choices_db: =====> {study_choices_db}")
405
  study_files = get_all_study_files()
406
  logger.info(f"study_files: =====> {study_files}")
@@ -501,8 +512,8 @@ def create_gr_interface() -> gr.Blocks:
501
  ).then(fn=cleanup_temp_files, inputs=None, outputs=None)
502
 
503
  refresh_button.click(
504
- fn=refresh_study_choices,
505
- outputs=[study_dropdown] # Update the same dropdown
506
  )
507
 
508
  # Event handlers for PDF Chat tab
 
1
  # app.py
2
 
3
  import csv
 
4
  import datetime
 
5
  # from datetime import datetime
6
  import io
7
  import json
8
  import logging
9
  import os
10
+ from typing import Any, List, Tuple
11
 
12
  import gradio as gr
13
  import openai
14
+ from cachetools import LRUCache
15
  from dotenv import load_dotenv
16
  from slugify import slugify
 
17
 
18
+ from config import OPENAI_API_KEY, STUDY_FILES
19
+ from interface import create_chat_interface
20
  from rag.rag_pipeline import RAGPipeline
21
+ from utils.db import (
22
+ add_study_files_to_db,
23
+ create_db_and_tables,
24
+ get_all_study_files,
25
+ get_study_file_by_name,
26
+ get_study_files_by_library_id,
27
+ )
28
  from utils.helpers import (
 
29
  add_study_files_to_chromadb,
30
+ append_to_study_files,
31
  chromadb_client,
32
  )
 
 
 
 
 
33
  from utils.pdf_processor import PDFProcessor
34
+ from utils.prompts import evidence_based_prompt, highlight_prompt
35
+ from utils.zotero_manager import ZoteroManager
36
 
37
  # Configure logging
38
  logging.basicConfig(level=logging.INFO)
 
57
  # data_ = {}
58
  # json.dump(data_, file, indent=4)
59
 
60
+
61
  def get_cache_value(key):
62
  return cache.get(key)
63
 
64
+
65
  zotero_library_id = get_cache_value("zotero_library_id")
66
+ logger.info(f"zotero_library_id: {zotero_library_id}")
67
 
68
 
69
  def get_rag_pipeline(study_name: str) -> RAGPipeline:
 
88
  """Retrieve information about the specified study."""
89
  if isinstance(study_name, list):
90
  study_name = study_name[0] if study_name else None
91
+
92
  if not study_name:
93
  return "No study selected"
94
+
95
  study = get_study_file_by_name(study_name)
96
+ logger.info(f"Study: {study}")
97
 
98
  collection = chromadb_client.get_or_create_collection("study_files_collection")
99
  result = collection.get(ids=[study_name]) # Query by study name (as a list)
100
+ logger.info(f"Result: {result}")
101
 
102
  if not result or len(result["metadatas"]) == 0:
103
  raise ValueError(f"Invalid study name: {study_name}")
104
 
105
  study_file = result["metadatas"][0].get("file_path")
106
+ logger.info(f"study_file: {study_file}")
107
  if not study_file:
108
  raise ValueError(f"File path not found for study name: {study_name}")
109
 
 
159
  return "Please enter a valid query."
160
 
161
  rag = get_rag_pipeline(study_name)
162
+ logger.info(f"rag: {rag}")
163
  prompt = {
164
  "Highlight": highlight_prompt,
165
  "Evidence-based": evidence_based_prompt,
 
234
 
235
  # Dynamically update study choices
236
  global study_choices
237
+ study_choices = [
238
+ file.name for file in get_study_files_by_library_id([zotero_library_id])
239
+ ]
240
  message = "Successfully processed items in your zotero library"
241
  except Exception as e:
242
  message = f"Error process your zotero library: {str(e)}"
 
247
  def refresh_study_choices():
248
  """
249
  Refresh study choices for a specific dropdown instance.
250
+
251
  :return: Updated Dropdown with current study choices
252
  """
253
  global study_choices
254
  zotero_library_id = get_cache_value("zotero_library_id")
255
+ logger.info(f"zotero_library_id: {zotero_library_id}")
256
+ study_choices = [
257
+ file.name for file in get_study_files_by_library_id([zotero_library_id])
258
+ ]
259
+ logger.info(f"Study choices: {study_choices}")
260
  return study_choices
261
 
262
 
 
264
  # Split input based on commas and strip any extra spaces
265
  variable_list = [word.strip().upper() for word in text.split(",")]
266
  user_message = f"Extract and present in a tabular format the following variables for each {study_name} study: {', '.join(variable_list)}"
267
+ logger.info(f"User message: {user_message}")
268
  response = chat_function(user_message, study_name, prompt_type)
269
  return [response, gr.update(visible=True)]
270
 
 
409
  if zotero_library_id is None:
410
  zotero_library_id = get_cache_value("zotero_library_id")
411
  logger.info(f"zotero_library_id: =====> {zotero_library_id}")
412
+ study_choices_db = get_study_files_by_library_id(
413
+ [zotero_library_id]
414
+ )
415
  logger.info(f"study_choices_db: =====> {study_choices_db}")
416
  study_files = get_all_study_files()
417
  logger.info(f"study_files: =====> {study_files}")
 
512
  ).then(fn=cleanup_temp_files, inputs=None, outputs=None)
513
 
514
  refresh_button.click(
515
+ fn=refresh_study_choices,
516
+ outputs=[study_dropdown], # Update the same dropdown
517
  )
518
 
519
  # Event handlers for PDF Chat tab
docs.py CHANGED
@@ -10,4 +10,4 @@ Welcome to the Acres AI RAG API documentation.
10
 
11
  tags_metadata = [
12
  {"name": "ACRES RAG", "description": "AI RAG Application"},
13
- ]
 
10
 
11
  tags_metadata = [
12
  {"name": "ACRES RAG", "description": "AI RAG Application"},
13
+ ]
pyproject.toml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.black]
2
+ include = '\.pyi?$'
3
+ exclude = '''
4
+ /(
5
+ \.git
6
+ | \.hg
7
+ | \.mypy_cache
8
+ | \.tox
9
+ | \.venv
10
+ | env
11
+ |venv
12
+ | _build
13
+ | buck-out
14
+ | build
15
+ | dist
16
+ | migrations
17
+ |alembic
18
+ )/
19
+ '''
rag/rag_pipeline.py CHANGED
@@ -1,19 +1,15 @@
1
  # rag/rag_pipeline.py
2
  import json
3
  import logging
4
- from typing import Dict, Any, List
 
5
 
6
- from llama_index.core import Document, VectorStoreIndex
7
- from llama_index.core.node_parser import SentenceWindowNodeParser, SentenceSplitter
8
- from llama_index.core import PromptTemplate
9
  from llama_index.embeddings.openai import OpenAIEmbedding
10
  from llama_index.llms.openai import OpenAI
11
  from llama_index.vector_stores.chroma import ChromaVectorStore
12
- import chromadb
13
- from typing import Dict, Any, List, Tuple, Optional
14
- import re
15
- import logging
16
-
17
 
18
  logging.basicConfig(level=logging.INFO)
19
  logger = logging.getLogger(__name__)
@@ -172,7 +168,6 @@ class RAGPipeline:
172
  self.extract_page_number_from_query(context) if self.is_pdf else None
173
  )
174
 
175
-
176
  # This is a hack to index all the documents in the store :)
177
  n_documents = len(self.index.docstore.docs)
178
  print(f"n_documents: {n_documents}")
 
1
  # rag/rag_pipeline.py
2
  import json
3
  import logging
4
+ import re
5
+ from typing import Any, Dict, List, Optional, Tuple
6
 
7
+ import chromadb
8
+ from llama_index.core import Document, PromptTemplate, VectorStoreIndex
9
+ from llama_index.core.node_parser import SentenceSplitter, SentenceWindowNodeParser
10
  from llama_index.embeddings.openai import OpenAIEmbedding
11
  from llama_index.llms.openai import OpenAI
12
  from llama_index.vector_stores.chroma import ChromaVectorStore
 
 
 
 
 
13
 
14
  logging.basicConfig(level=logging.INFO)
15
  logger = logging.getLogger(__name__)
 
168
  self.extract_page_number_from_query(context) if self.is_pdf else None
169
  )
170
 
 
171
  # This is a hack to index all the documents in the store :)
172
  n_documents = len(self.index.docstore.docs)
173
  print(f"n_documents: {n_documents}")
rag/rag_pipeline_backup.py CHANGED
@@ -1,9 +1,8 @@
1
  import json
2
- from typing import Dict, Any
3
- from llama_index.core import Document, VectorStoreIndex
4
- from llama_index.core.node_parser import SentenceWindowNodeParser, SentenceSplitter
5
- from llama_index.core import PromptTemplate
6
- from typing import List
7
  from llama_index.embeddings.openai import OpenAIEmbedding
8
  from llama_index.llms.openai import OpenAI
9
 
 
1
  import json
2
+ from typing import Any, Dict, List
3
+
4
+ from llama_index.core import Document, PromptTemplate, VectorStoreIndex
5
+ from llama_index.core.node_parser import SentenceSplitter, SentenceWindowNodeParser
 
6
  from llama_index.embeddings.openai import OpenAIEmbedding
7
  from llama_index.llms.openai import OpenAI
8
 
requirements-dev.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ black==24.10.0
2
+ isort==5.13.2
3
+ flake8==7.1.1
utils/db.py CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a050d39acd75098f97fd8a7032c231c5bf1865398703cd9253f61ff3a67ab294
3
- size 4954
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84acae8e51383d6990cd9edb7c1684292e523e7d0af87a71531bd5f9cf2909b5
3
+ size 4907
utils/helpers.py CHANGED
@@ -1,18 +1,18 @@
1
  # utils/helpers.py
2
 
3
- from typing import Dict, Any
 
 
 
 
4
  from llama_index.core import Response
5
- from typing import List
6
  from rag.rag_pipeline import RAGPipeline
7
  from utils.prompts import (
8
- structured_follow_up_prompt,
9
- VaccineCoverageVariables,
10
  StudyCharacteristics,
 
 
11
  )
12
- import json
13
- import json
14
- import chromadb
15
- from chromadb.api.types import Document
16
 
17
  # Initialize ChromaDB client
18
  chromadb_client = chromadb.Client()
@@ -88,7 +88,7 @@ def append_to_study_files(file_path, new_key, new_value):
88
  "Gene Xpert": "data/gene_xpert_zotero_items.json"
89
  }
90
  """
91
- try:
92
  # Read the existing data from the file
93
  with open(file_path, "r") as file:
94
  data = json.load(file)
 
1
  # utils/helpers.py
2
 
3
+ import json
4
+ from typing import Any, Dict, List
5
+
6
+ import chromadb
7
+ from chromadb.api.types import Document
8
  from llama_index.core import Response
9
+
10
  from rag.rag_pipeline import RAGPipeline
11
  from utils.prompts import (
 
 
12
  StudyCharacteristics,
13
+ VaccineCoverageVariables,
14
+ structured_follow_up_prompt,
15
  )
 
 
 
 
16
 
17
  # Initialize ChromaDB client
18
  chromadb_client = chromadb.Client()
 
88
  "Gene Xpert": "data/gene_xpert_zotero_items.json"
89
  }
90
  """
91
+ try:
92
  # Read the existing data from the file
93
  with open(file_path, "r") as file:
94
  data = json.load(file)
utils/pdf_processor.py CHANGED
@@ -3,17 +3,17 @@ PDF processing module for ACRES RAG Platform.
3
  Handles PDF file processing, text extraction, and page rendering.
4
  """
5
 
6
- # utils/pdf_processor.py
7
- import os
8
- import fitz
9
- import logging
10
- from typing import Dict, List, Optional
11
  import datetime
12
- from slugify import slugify
13
  import json
14
- from PIL import Image
 
 
15
  import re
 
16
 
 
 
 
17
 
18
  logger = logging.getLogger(__name__)
19
 
 
3
  Handles PDF file processing, text extraction, and page rendering.
4
  """
5
 
 
 
 
 
 
6
  import datetime
 
7
  import json
8
+ import logging
9
+ # utils/pdf_processor.py
10
+ import os
11
  import re
12
+ from typing import Dict, List, Optional
13
 
14
+ import fitz
15
+ from PIL import Image
16
+ from slugify import slugify
17
 
18
  logger = logging.getLogger(__name__)
19
 
utils/prompts.py CHANGED
@@ -1,9 +1,10 @@
1
  # utils/prompts.py
2
 
 
 
3
  from llama_index.core import PromptTemplate
4
- from typing import Optional, List
5
- from pydantic import BaseModel, Field
6
  from llama_index.core.prompts import PromptTemplate
 
7
 
8
 
9
  class StudyCharacteristics(BaseModel):
 
1
  # utils/prompts.py
2
 
3
+ from typing import List, Optional
4
+
5
  from llama_index.core import PromptTemplate
 
 
6
  from llama_index.core.prompts import PromptTemplate
7
+ from pydantic import BaseModel, Field
8
 
9
 
10
  class StudyCharacteristics(BaseModel):