Spaces:
Sleeping
Sleeping
File size: 1,976 Bytes
f390c59 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
import streamlit as st
import os
from dotenv import load_dotenv
from utils import (
extract_text_from_pdf,
build_hierarchical_tree,
save_tree,
hybrid_retrieval,
rag_answer,
)
# Load API key from .env
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# Create necessary directories
os.makedirs("uploaded_textbooks", exist_ok=True)
os.makedirs("hierarchical_trees", exist_ok=True)
os.makedirs("retrieved_contexts", exist_ok=True)
# Streamlit UI
st.title("Hierarchical Question-Answering System 📚🤖")
st.markdown(
"Upload textbooks, explore their structure, and ask questions powered by AI."
)
# Upload PDF section
uploaded_files = st.file_uploader("Upload Textbooks (PDF)", type=["pdf"], accept_multiple_files=True)
if uploaded_files:
for uploaded_file in uploaded_files:
file_path = os.path.join("uploaded_textbooks", uploaded_file.name)
with open(file_path, "wb") as f:
f.write(uploaded_file.read())
# Extract text
st.write(f"Processing: {uploaded_file.name}")
extracted_text = extract_text_from_pdf(file_path)
# Build hierarchical tree
tree = build_hierarchical_tree(extracted_text, textbook_title=uploaded_file.name)
tree_path = os.path.join("hierarchical_trees", f"{uploaded_file.name}_tree.json")
save_tree(tree, tree_path)
st.success(f"Processed and indexed: {uploaded_file.name}")
# Query Section
query = st.text_input("Ask a question:")
if query:
st.write("Retrieving relevant information...")
relevant_text = hybrid_retrieval(query, OPENAI_API_KEY)
if relevant_text:
st.write("Generating an answer...")
answer = rag_answer(query, relevant_text, OPENAI_API_KEY)
st.write(f"**Answer:** {answer}")
st.write("**Relevant Context:**")
st.write(relevant_text)
else:
st.write("No relevant information found.")
|