Spaces:
Running
Running
Patrick Walukagga
commited on
Commit
·
f796b9b
1
Parent(s):
40b7cbf
Refresh studies
Browse files- .gitignore +1 -1
- app.py +58 -29
- study_files.json +0 -5
- utils/db.py +2 -2
- utils/helpers.py +3 -0
.gitignore
CHANGED
@@ -176,7 +176,7 @@ pyrightconfig.json
|
|
176 |
data/
|
177 |
study_export_*
|
178 |
study_files.db
|
179 |
-
|
180 |
|
181 |
infra/ecs_config.toml
|
182 |
aws-cli.pdf
|
|
|
176 |
data/
|
177 |
study_export_*
|
178 |
study_files.db
|
179 |
+
study_files_new.json
|
180 |
|
181 |
infra/ecs_config.toml
|
182 |
aws-cli.pdf
|
app.py
CHANGED
@@ -63,7 +63,7 @@ def get_cache_value(key):
|
|
63 |
|
64 |
|
65 |
zotero_library_id = get_cache_value("zotero_library_id")
|
66 |
-
logger.info(f"zotero_library_id: {zotero_library_id}")
|
67 |
|
68 |
|
69 |
def get_rag_pipeline(study_name: str) -> RAGPipeline:
|
@@ -95,14 +95,14 @@ def get_study_info(study_name: str | list) -> str:
|
|
95 |
study = get_study_file_by_name(study_name)
|
96 |
logger.info(f"Study: {study}")
|
97 |
|
98 |
-
collection = chromadb_client.get_or_create_collection("study_files_collection")
|
99 |
-
result = collection.get(ids=[study_name]) # Query by study name (as a list)
|
100 |
-
logger.info(f"Result: {result}")
|
101 |
|
102 |
-
if not
|
103 |
raise ValueError(f"Invalid study name: {study_name}")
|
104 |
|
105 |
-
study_file =
|
106 |
logger.info(f"study_file: {study_file}")
|
107 |
if not study_file:
|
108 |
raise ValueError(f"File path not found for study name: {study_name}")
|
@@ -244,22 +244,36 @@ def process_zotero_library_items(
|
|
244 |
return message
|
245 |
|
246 |
|
|
|
|
|
|
|
|
|
|
|
247 |
def refresh_study_choices():
|
248 |
"""
|
249 |
Refresh study choices for a specific dropdown instance.
|
250 |
|
251 |
:return: Updated Dropdown with current study choices
|
252 |
"""
|
253 |
-
global study_choices
|
254 |
zotero_library_id = get_cache_value("zotero_library_id")
|
255 |
-
logger.info(f"zotero_library_id: {zotero_library_id}")
|
256 |
study_choices = [
|
257 |
file.name for file in get_study_files_by_library_id([zotero_library_id])
|
258 |
]
|
259 |
-
logger.info(f"Study choices: {study_choices}")
|
260 |
return study_choices
|
261 |
|
262 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
263 |
def process_multi_input(text, study_name, prompt_type):
|
264 |
# Split input based on commas and strip any extra spaces
|
265 |
variable_list = [word.strip().upper() for word in text.split(",")]
|
@@ -369,6 +383,7 @@ def chat_response(
|
|
369 |
def create_gr_interface() -> gr.Blocks:
|
370 |
"""Create and configure the Gradio interface for the RAG platform."""
|
371 |
global zotero_library_id
|
|
|
372 |
with gr.Blocks(theme=gr.themes.Base()) as demo:
|
373 |
gr.Markdown("# ACRES RAG Platform")
|
374 |
|
@@ -391,46 +406,60 @@ def create_gr_interface() -> gr.Blocks:
|
|
391 |
process_zotero_btn = gr.Button("Process your Zotero Library")
|
392 |
zotero_output = gr.Markdown(label="Zotero")
|
393 |
|
394 |
-
gr.
|
395 |
-
|
396 |
-
"study_files_collection"
|
397 |
-
)
|
398 |
-
all_documents = collection.query(
|
399 |
-
query_texts=[""], n_results=1000
|
400 |
)
|
401 |
-
study_choices = [
|
402 |
-
doc_id
|
403 |
-
for doc_id in all_documents.get("ids")[0]
|
404 |
-
if all_documents
|
405 |
-
]
|
406 |
|
407 |
-
|
|
|
408 |
zotero_library_id = zotero_library_id_param.value
|
409 |
if zotero_library_id is None:
|
410 |
zotero_library_id = get_cache_value("zotero_library_id")
|
411 |
logger.info(f"zotero_library_id: =====> {zotero_library_id}")
|
412 |
-
|
413 |
-
|
414 |
-
)
|
415 |
-
logger.info(f"study_choices_db: =====> {study_choices_db}")
|
416 |
-
study_files = get_all_study_files()
|
417 |
-
logger.info(f"study_files: =====> {study_files}")
|
418 |
|
419 |
study_dropdown = gr.Dropdown(
|
420 |
choices=study_choices,
|
421 |
label="Select Study",
|
422 |
value=(study_choices[0] if study_choices else None),
|
|
|
423 |
)
|
424 |
# In Gradio interface setup
|
425 |
refresh_button = gr.Button("Refresh Studies")
|
426 |
|
427 |
study_info = gr.Markdown(label="Study Details")
|
|
|
428 |
prompt_type = gr.Radio(
|
429 |
["Default", "Highlight", "Evidence-based"],
|
430 |
label="Prompt Type",
|
431 |
value="Default",
|
432 |
)
|
433 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
434 |
with gr.Column(scale=3):
|
435 |
gr.Markdown("### Study Variables")
|
436 |
with gr.Row():
|
@@ -512,8 +541,8 @@ def create_gr_interface() -> gr.Blocks:
|
|
512 |
).then(fn=cleanup_temp_files, inputs=None, outputs=None)
|
513 |
|
514 |
refresh_button.click(
|
515 |
-
fn=
|
516 |
-
outputs=[
|
517 |
)
|
518 |
|
519 |
# Event handlers for PDF Chat tab
|
|
|
63 |
|
64 |
|
65 |
zotero_library_id = get_cache_value("zotero_library_id")
|
66 |
+
logger.info(f"zotero_library_id cache: {zotero_library_id}")
|
67 |
|
68 |
|
69 |
def get_rag_pipeline(study_name: str) -> RAGPipeline:
|
|
|
95 |
study = get_study_file_by_name(study_name)
|
96 |
logger.info(f"Study: {study}")
|
97 |
|
98 |
+
# collection = chromadb_client.get_or_create_collection("study_files_collection")
|
99 |
+
# result = collection.get(ids=[study_name]) # Query by study name (as a list)
|
100 |
+
# logger.info(f"Result: {result}")
|
101 |
|
102 |
+
if not study:
|
103 |
raise ValueError(f"Invalid study name: {study_name}")
|
104 |
|
105 |
+
study_file = study.file_path
|
106 |
logger.info(f"study_file: {study_file}")
|
107 |
if not study_file:
|
108 |
raise ValueError(f"File path not found for study name: {study_name}")
|
|
|
244 |
return message
|
245 |
|
246 |
|
247 |
+
process_zotero_library_items(
|
248 |
+
os.getenv("ZOTERO_LIBRARY_ID"), os.getenv("ZOTERO_API_ACCESS_KEY")
|
249 |
+
)
|
250 |
+
|
251 |
+
|
252 |
def refresh_study_choices():
|
253 |
"""
|
254 |
Refresh study choices for a specific dropdown instance.
|
255 |
|
256 |
:return: Updated Dropdown with current study choices
|
257 |
"""
|
258 |
+
global study_choices, zotero_library_id
|
259 |
zotero_library_id = get_cache_value("zotero_library_id")
|
260 |
+
logger.info(f"zotero_library_id refreshed: {zotero_library_id}")
|
261 |
study_choices = [
|
262 |
file.name for file in get_study_files_by_library_id([zotero_library_id])
|
263 |
]
|
264 |
+
logger.info(f"Study choices refreshed: {study_choices}")
|
265 |
return study_choices
|
266 |
|
267 |
|
268 |
+
def new_study_choices():
|
269 |
+
"""
|
270 |
+
Refresh study choices for a specific dropdown instance.
|
271 |
+
"""
|
272 |
+
study_choices = refresh_study_choices()
|
273 |
+
study_choices = ", ".join(study_choices)
|
274 |
+
return f"**Your studies are: {study_choices}**"
|
275 |
+
|
276 |
+
|
277 |
def process_multi_input(text, study_name, prompt_type):
|
278 |
# Split input based on commas and strip any extra spaces
|
279 |
variable_list = [word.strip().upper() for word in text.split(",")]
|
|
|
383 |
def create_gr_interface() -> gr.Blocks:
|
384 |
"""Create and configure the Gradio interface for the RAG platform."""
|
385 |
global zotero_library_id
|
386 |
+
|
387 |
with gr.Blocks(theme=gr.themes.Base()) as demo:
|
388 |
gr.Markdown("# ACRES RAG Platform")
|
389 |
|
|
|
406 |
process_zotero_btn = gr.Button("Process your Zotero Library")
|
407 |
zotero_output = gr.Markdown(label="Zotero")
|
408 |
|
409 |
+
local_storage_state = gr.BrowserState(
|
410 |
+
{"zotero_library_id": "", "study_choices": []}
|
|
|
|
|
|
|
|
|
411 |
)
|
|
|
|
|
|
|
|
|
|
|
412 |
|
413 |
+
gr.Markdown("### Study Information")
|
414 |
+
|
415 |
zotero_library_id = zotero_library_id_param.value
|
416 |
if zotero_library_id is None:
|
417 |
zotero_library_id = get_cache_value("zotero_library_id")
|
418 |
logger.info(f"zotero_library_id: =====> {zotero_library_id}")
|
419 |
+
study_choices = refresh_study_choices()
|
420 |
+
logger.info(f"study_choices_db: =====> {study_choices}")
|
|
|
|
|
|
|
|
|
421 |
|
422 |
study_dropdown = gr.Dropdown(
|
423 |
choices=study_choices,
|
424 |
label="Select Study",
|
425 |
value=(study_choices[0] if study_choices else None),
|
426 |
+
allow_custom_value=True,
|
427 |
)
|
428 |
# In Gradio interface setup
|
429 |
refresh_button = gr.Button("Refresh Studies")
|
430 |
|
431 |
study_info = gr.Markdown(label="Study Details")
|
432 |
+
new_studies = gr.Markdown(label="Your Studies")
|
433 |
prompt_type = gr.Radio(
|
434 |
["Default", "Highlight", "Evidence-based"],
|
435 |
label="Prompt Type",
|
436 |
value="Default",
|
437 |
)
|
438 |
|
439 |
+
@demo.load(
|
440 |
+
inputs=[local_storage_state],
|
441 |
+
outputs=[zotero_library_id_param],
|
442 |
+
)
|
443 |
+
def load_from_local_storage(saved_values):
|
444 |
+
print("loading from local storage", saved_values)
|
445 |
+
return saved_values.get("zotero_library_id")
|
446 |
+
|
447 |
+
@gr.on(
|
448 |
+
[
|
449 |
+
zotero_library_id_param.change,
|
450 |
+
process_zotero_btn.click,
|
451 |
+
refresh_button.click,
|
452 |
+
],
|
453 |
+
inputs=[zotero_library_id_param],
|
454 |
+
outputs=[local_storage_state],
|
455 |
+
)
|
456 |
+
def save_to_local_storage(zotero_library_id_param):
|
457 |
+
study_choices = refresh_study_choices()
|
458 |
+
return {
|
459 |
+
"zotero_library_id": zotero_library_id_param,
|
460 |
+
"study_choices": study_choices,
|
461 |
+
}
|
462 |
+
|
463 |
with gr.Column(scale=3):
|
464 |
gr.Markdown("### Study Variables")
|
465 |
with gr.Row():
|
|
|
541 |
).then(fn=cleanup_temp_files, inputs=None, outputs=None)
|
542 |
|
543 |
refresh_button.click(
|
544 |
+
fn=new_study_choices,
|
545 |
+
outputs=[new_studies], # Update the same dropdown
|
546 |
)
|
547 |
|
548 |
# Event handlers for PDF Chat tab
|
study_files.json
DELETED
@@ -1,5 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"Vaccine coverage": "data/vaccine_coverage_zotero_items.json",
|
3 |
-
"Ebola Virus": "data/ebola_virus_zotero_items.json",
|
4 |
-
"GeneXpert": "data/gene_xpert_zotero_items.json"
|
5 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
utils/db.py
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4fc6c599c827559f1eb0b001f4a132109b004ae3d12851ac2e2327492a323e44
|
3 |
+
size 4968
|
utils/helpers.py
CHANGED
@@ -195,6 +195,9 @@ def add_study_files_to_chromadb(file_path: str, collection_name: str):
|
|
195 |
print(f"File '{file_path}' not found.")
|
196 |
return
|
197 |
|
|
|
|
|
|
|
198 |
# Get or create the collection in ChromaDB
|
199 |
collection = chromadb_client.get_or_create_collection(collection_name)
|
200 |
|
|
|
195 |
print(f"File '{file_path}' not found.")
|
196 |
return
|
197 |
|
198 |
+
if not study_files_data:
|
199 |
+
return
|
200 |
+
|
201 |
# Get or create the collection in ChromaDB
|
202 |
collection = chromadb_client.get_or_create_collection(collection_name)
|
203 |
|