Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,26 +5,35 @@ from typing import List
|
|
5 |
from unified_document_processor import UnifiedDocumentProcessor, CustomEmbeddingFunction
|
6 |
import chromadb
|
7 |
from chromadb.config import Settings
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
class StreamlitDocProcessor:
|
10 |
def __init__(self):
|
11 |
-
|
12 |
-
if 'CHROMADB_DIR' not in st.session_state:
|
13 |
-
st.session_state.CHROMADB_DIR = os.path.join(os.getcwd(), 'chromadb_data')
|
14 |
-
os.makedirs(st.session_state.CHROMADB_DIR, exist_ok=True)
|
15 |
-
|
16 |
-
# Initialize session state
|
17 |
-
if 'processor' not in st.session_state:
|
18 |
try:
|
19 |
groq_api_key = st.secrets["GROQ_API_KEY"]
|
20 |
# Initialize processor with persistent ChromaDB
|
21 |
st.session_state.processor = self.initialize_processor(groq_api_key)
|
|
|
|
|
22 |
except Exception as e:
|
23 |
st.error(f"Error initializing processor: {str(e)}")
|
24 |
return
|
25 |
-
|
26 |
-
if 'processed_files' not in st.session_state:
|
27 |
-
st.session_state.processed_files = self.get_processed_files()
|
28 |
|
29 |
def initialize_processor(self, groq_api_key):
|
30 |
"""Initialize the processor with persistent ChromaDB"""
|
@@ -65,8 +74,10 @@ class StreamlitDocProcessor:
|
|
65 |
def get_processed_files(self) -> set:
|
66 |
"""Get list of processed files from ChromaDB"""
|
67 |
try:
|
68 |
-
|
69 |
-
|
|
|
|
|
70 |
except Exception as e:
|
71 |
st.error(f"Error getting processed files: {str(e)}")
|
72 |
return set()
|
@@ -181,6 +192,10 @@ class StreamlitDocProcessor:
|
|
181 |
st.error(f"Error in Q&A interface: {str(e)}")
|
182 |
|
183 |
def main():
|
|
|
|
|
|
|
|
|
184 |
app = StreamlitDocProcessor()
|
185 |
app.run()
|
186 |
|
|
|
5 |
from unified_document_processor import UnifiedDocumentProcessor, CustomEmbeddingFunction
|
6 |
import chromadb
|
7 |
from chromadb.config import Settings
|
8 |
+
from groq import Groq
|
9 |
+
|
10 |
+
def initialize_session_state():
|
11 |
+
"""Initialize all session state variables"""
|
12 |
+
if 'CHROMADB_DIR' not in st.session_state:
|
13 |
+
st.session_state.CHROMADB_DIR = os.path.join(os.getcwd(), 'chromadb_data')
|
14 |
+
os.makedirs(st.session_state.CHROMADB_DIR, exist_ok=True)
|
15 |
+
|
16 |
+
if 'processed_files' not in st.session_state:
|
17 |
+
st.session_state.processed_files = set()
|
18 |
+
|
19 |
+
if 'processor' not in st.session_state:
|
20 |
+
try:
|
21 |
+
st.session_state.processor = None # Will be initialized in StreamlitDocProcessor
|
22 |
+
except Exception as e:
|
23 |
+
st.error(f"Error initializing processor: {str(e)}")
|
24 |
|
25 |
class StreamlitDocProcessor:
|
26 |
def __init__(self):
|
27 |
+
if st.session_state.processor is None:
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
try:
|
29 |
groq_api_key = st.secrets["GROQ_API_KEY"]
|
30 |
# Initialize processor with persistent ChromaDB
|
31 |
st.session_state.processor = self.initialize_processor(groq_api_key)
|
32 |
+
# Update processed files after initializing processor
|
33 |
+
st.session_state.processed_files = self.get_processed_files()
|
34 |
except Exception as e:
|
35 |
st.error(f"Error initializing processor: {str(e)}")
|
36 |
return
|
|
|
|
|
|
|
37 |
|
38 |
def initialize_processor(self, groq_api_key):
|
39 |
"""Initialize the processor with persistent ChromaDB"""
|
|
|
74 |
def get_processed_files(self) -> set:
|
75 |
"""Get list of processed files from ChromaDB"""
|
76 |
try:
|
77 |
+
if st.session_state.processor:
|
78 |
+
available_files = st.session_state.processor.get_available_files()
|
79 |
+
return set(available_files['pdf'] + available_files['xml'])
|
80 |
+
return set()
|
81 |
except Exception as e:
|
82 |
st.error(f"Error getting processed files: {str(e)}")
|
83 |
return set()
|
|
|
192 |
st.error(f"Error in Q&A interface: {str(e)}")
|
193 |
|
194 |
def main():
|
195 |
+
# Initialize session state
|
196 |
+
initialize_session_state()
|
197 |
+
|
198 |
+
# Create and run app
|
199 |
app = StreamlitDocProcessor()
|
200 |
app.run()
|
201 |
|