Patrick Walukagga commited on
Commit
f796b9b
·
1 Parent(s): 40b7cbf

Refresh studies

Browse files
Files changed (5) hide show
  1. .gitignore +1 -1
  2. app.py +58 -29
  3. study_files.json +0 -5
  4. utils/db.py +2 -2
  5. utils/helpers.py +3 -0
.gitignore CHANGED
@@ -176,7 +176,7 @@ pyrightconfig.json
176
  data/
177
  study_export_*
178
  study_files.db
179
- study_files.json
180
 
181
  infra/ecs_config.toml
182
  aws-cli.pdf
 
176
  data/
177
  study_export_*
178
  study_files.db
179
+ study_files_new.json
180
 
181
  infra/ecs_config.toml
182
  aws-cli.pdf
app.py CHANGED
@@ -63,7 +63,7 @@ def get_cache_value(key):
63
 
64
 
65
  zotero_library_id = get_cache_value("zotero_library_id")
66
- logger.info(f"zotero_library_id: {zotero_library_id}")
67
 
68
 
69
  def get_rag_pipeline(study_name: str) -> RAGPipeline:
@@ -95,14 +95,14 @@ def get_study_info(study_name: str | list) -> str:
95
  study = get_study_file_by_name(study_name)
96
  logger.info(f"Study: {study}")
97
 
98
- collection = chromadb_client.get_or_create_collection("study_files_collection")
99
- result = collection.get(ids=[study_name]) # Query by study name (as a list)
100
- logger.info(f"Result: {result}")
101
 
102
- if not result or len(result["metadatas"]) == 0:
103
  raise ValueError(f"Invalid study name: {study_name}")
104
 
105
- study_file = result["metadatas"][0].get("file_path")
106
  logger.info(f"study_file: {study_file}")
107
  if not study_file:
108
  raise ValueError(f"File path not found for study name: {study_name}")
@@ -244,22 +244,36 @@ def process_zotero_library_items(
244
  return message
245
 
246
 
 
 
 
 
 
247
  def refresh_study_choices():
248
  """
249
  Refresh study choices for a specific dropdown instance.
250
 
251
  :return: Updated Dropdown with current study choices
252
  """
253
- global study_choices
254
  zotero_library_id = get_cache_value("zotero_library_id")
255
- logger.info(f"zotero_library_id: {zotero_library_id}")
256
  study_choices = [
257
  file.name for file in get_study_files_by_library_id([zotero_library_id])
258
  ]
259
- logger.info(f"Study choices: {study_choices}")
260
  return study_choices
261
 
262
 
 
 
 
 
 
 
 
 
 
263
  def process_multi_input(text, study_name, prompt_type):
264
  # Split input based on commas and strip any extra spaces
265
  variable_list = [word.strip().upper() for word in text.split(",")]
@@ -369,6 +383,7 @@ def chat_response(
369
  def create_gr_interface() -> gr.Blocks:
370
  """Create and configure the Gradio interface for the RAG platform."""
371
  global zotero_library_id
 
372
  with gr.Blocks(theme=gr.themes.Base()) as demo:
373
  gr.Markdown("# ACRES RAG Platform")
374
 
@@ -391,46 +406,60 @@ def create_gr_interface() -> gr.Blocks:
391
  process_zotero_btn = gr.Button("Process your Zotero Library")
392
  zotero_output = gr.Markdown(label="Zotero")
393
 
394
- gr.Markdown("### Study Information")
395
- collection = chromadb_client.get_or_create_collection(
396
- "study_files_collection"
397
- )
398
- all_documents = collection.query(
399
- query_texts=[""], n_results=1000
400
  )
401
- study_choices = [
402
- doc_id
403
- for doc_id in all_documents.get("ids")[0]
404
- if all_documents
405
- ]
406
 
407
- print(f"zotero_library_id: {zotero_library_id_param.value}")
 
408
  zotero_library_id = zotero_library_id_param.value
409
  if zotero_library_id is None:
410
  zotero_library_id = get_cache_value("zotero_library_id")
411
  logger.info(f"zotero_library_id: =====> {zotero_library_id}")
412
- study_choices_db = get_study_files_by_library_id(
413
- [zotero_library_id]
414
- )
415
- logger.info(f"study_choices_db: =====> {study_choices_db}")
416
- study_files = get_all_study_files()
417
- logger.info(f"study_files: =====> {study_files}")
418
 
419
  study_dropdown = gr.Dropdown(
420
  choices=study_choices,
421
  label="Select Study",
422
  value=(study_choices[0] if study_choices else None),
 
423
  )
424
  # In Gradio interface setup
425
  refresh_button = gr.Button("Refresh Studies")
426
 
427
  study_info = gr.Markdown(label="Study Details")
 
428
  prompt_type = gr.Radio(
429
  ["Default", "Highlight", "Evidence-based"],
430
  label="Prompt Type",
431
  value="Default",
432
  )
433
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
434
  with gr.Column(scale=3):
435
  gr.Markdown("### Study Variables")
436
  with gr.Row():
@@ -512,8 +541,8 @@ def create_gr_interface() -> gr.Blocks:
512
  ).then(fn=cleanup_temp_files, inputs=None, outputs=None)
513
 
514
  refresh_button.click(
515
- fn=refresh_study_choices,
516
- outputs=[study_dropdown], # Update the same dropdown
517
  )
518
 
519
  # Event handlers for PDF Chat tab
 
63
 
64
 
65
  zotero_library_id = get_cache_value("zotero_library_id")
66
+ logger.info(f"zotero_library_id cache: {zotero_library_id}")
67
 
68
 
69
  def get_rag_pipeline(study_name: str) -> RAGPipeline:
 
95
  study = get_study_file_by_name(study_name)
96
  logger.info(f"Study: {study}")
97
 
98
+ # collection = chromadb_client.get_or_create_collection("study_files_collection")
99
+ # result = collection.get(ids=[study_name]) # Query by study name (as a list)
100
+ # logger.info(f"Result: {result}")
101
 
102
+ if not study:
103
  raise ValueError(f"Invalid study name: {study_name}")
104
 
105
+ study_file = study.file_path
106
  logger.info(f"study_file: {study_file}")
107
  if not study_file:
108
  raise ValueError(f"File path not found for study name: {study_name}")
 
244
  return message
245
 
246
 
247
+ process_zotero_library_items(
248
+ os.getenv("ZOTERO_LIBRARY_ID"), os.getenv("ZOTERO_API_ACCESS_KEY")
249
+ )
250
+
251
+
252
  def refresh_study_choices():
253
  """
254
  Refresh study choices for a specific dropdown instance.
255
 
256
  :return: Updated Dropdown with current study choices
257
  """
258
+ global study_choices, zotero_library_id
259
  zotero_library_id = get_cache_value("zotero_library_id")
260
+ logger.info(f"zotero_library_id refreshed: {zotero_library_id}")
261
  study_choices = [
262
  file.name for file in get_study_files_by_library_id([zotero_library_id])
263
  ]
264
+ logger.info(f"Study choices refreshed: {study_choices}")
265
  return study_choices
266
 
267
 
268
+ def new_study_choices():
269
+ """
270
+ Refresh study choices for a specific dropdown instance.
271
+ """
272
+ study_choices = refresh_study_choices()
273
+ study_choices = ", ".join(study_choices)
274
+ return f"**Your studies are: {study_choices}**"
275
+
276
+
277
  def process_multi_input(text, study_name, prompt_type):
278
  # Split input based on commas and strip any extra spaces
279
  variable_list = [word.strip().upper() for word in text.split(",")]
 
383
  def create_gr_interface() -> gr.Blocks:
384
  """Create and configure the Gradio interface for the RAG platform."""
385
  global zotero_library_id
386
+
387
  with gr.Blocks(theme=gr.themes.Base()) as demo:
388
  gr.Markdown("# ACRES RAG Platform")
389
 
 
406
  process_zotero_btn = gr.Button("Process your Zotero Library")
407
  zotero_output = gr.Markdown(label="Zotero")
408
 
409
+ local_storage_state = gr.BrowserState(
410
+ {"zotero_library_id": "", "study_choices": []}
 
 
 
 
411
  )
 
 
 
 
 
412
 
413
+ gr.Markdown("### Study Information")
414
+
415
  zotero_library_id = zotero_library_id_param.value
416
  if zotero_library_id is None:
417
  zotero_library_id = get_cache_value("zotero_library_id")
418
  logger.info(f"zotero_library_id: =====> {zotero_library_id}")
419
+ study_choices = refresh_study_choices()
420
+ logger.info(f"study_choices_db: =====> {study_choices}")
 
 
 
 
421
 
422
  study_dropdown = gr.Dropdown(
423
  choices=study_choices,
424
  label="Select Study",
425
  value=(study_choices[0] if study_choices else None),
426
+ allow_custom_value=True,
427
  )
428
  # In Gradio interface setup
429
  refresh_button = gr.Button("Refresh Studies")
430
 
431
  study_info = gr.Markdown(label="Study Details")
432
+ new_studies = gr.Markdown(label="Your Studies")
433
  prompt_type = gr.Radio(
434
  ["Default", "Highlight", "Evidence-based"],
435
  label="Prompt Type",
436
  value="Default",
437
  )
438
 
439
+ @demo.load(
440
+ inputs=[local_storage_state],
441
+ outputs=[zotero_library_id_param],
442
+ )
443
+ def load_from_local_storage(saved_values):
444
+ print("loading from local storage", saved_values)
445
+ return saved_values.get("zotero_library_id")
446
+
447
+ @gr.on(
448
+ [
449
+ zotero_library_id_param.change,
450
+ process_zotero_btn.click,
451
+ refresh_button.click,
452
+ ],
453
+ inputs=[zotero_library_id_param],
454
+ outputs=[local_storage_state],
455
+ )
456
+ def save_to_local_storage(zotero_library_id_param):
457
+ study_choices = refresh_study_choices()
458
+ return {
459
+ "zotero_library_id": zotero_library_id_param,
460
+ "study_choices": study_choices,
461
+ }
462
+
463
  with gr.Column(scale=3):
464
  gr.Markdown("### Study Variables")
465
  with gr.Row():
 
541
  ).then(fn=cleanup_temp_files, inputs=None, outputs=None)
542
 
543
  refresh_button.click(
544
+ fn=new_study_choices,
545
+ outputs=[new_studies], # Update the same dropdown
546
  )
547
 
548
  # Event handlers for PDF Chat tab
study_files.json DELETED
@@ -1,5 +0,0 @@
1
- {
2
- "Vaccine coverage": "data/vaccine_coverage_zotero_items.json",
3
- "Ebola Virus": "data/ebola_virus_zotero_items.json",
4
- "GeneXpert": "data/gene_xpert_zotero_items.json"
5
- }
 
 
 
 
 
 
utils/db.py CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84acae8e51383d6990cd9edb7c1684292e523e7d0af87a71531bd5f9cf2909b5
3
- size 4907
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fc6c599c827559f1eb0b001f4a132109b004ae3d12851ac2e2327492a323e44
3
+ size 4968
utils/helpers.py CHANGED
@@ -195,6 +195,9 @@ def add_study_files_to_chromadb(file_path: str, collection_name: str):
195
  print(f"File '{file_path}' not found.")
196
  return
197
 
 
 
 
198
  # Get or create the collection in ChromaDB
199
  collection = chromadb_client.get_or_create_collection(collection_name)
200
 
 
195
  print(f"File '{file_path}' not found.")
196
  return
197
 
198
+ if not study_files_data:
199
+ return
200
+
201
  # Get or create the collection in ChromaDB
202
  collection = chromadb_client.get_or_create_collection(collection_name)
203