Spaces:

ErvinYubo
/

TopEdu

Sleeping

App Files Files Community

Eleanor Zheng commited on 29 days ago

Commit

7daf5b1

1 Parent(s): e707907

Seperate pages to different files.

Browse files

Files changed (10) hide show

app.py +18 -510
pages/about.py +79 -0
pages/manage_documents.py +69 -0
pages/search_uni.py +88 -0
pages/upload_documents.py +70 -0
styles.css +126 -0
test_system.py +2 -2
utils/display.py +79 -0
rag_system.py → utils/rag_system.py +0 -0
utils/translations.py +100 -0

app.py CHANGED Viewed

@@ -1,9 +1,25 @@
 import streamlit as st
 import os
 from urllib.parse import urlparse, parse_qs
-from rag_system import DocumentIngestion, RAGSystem, save_query_result, load_shared_query
 from datetime import datetime
 import uuid
 # Configure Streamlit page
 st.set_page_config(
@@ -13,138 +29,6 @@ st.set_page_config(
     initial_sidebar_state="expanded"
 )
-# Custom CSS - Dark theme compatible
-st.markdown("""
-<style>
-    .main-header {
-        text-align: center;
-        padding: 2rem 0;
-        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-        color: white;
-        margin: -1rem -1rem 2rem -1rem;
-        border-radius: 10px;
-        box-shadow: 0 4px 15px 0 rgba(31, 38, 135, 0.37);
-    }
-    .stApp {
-        background: var(--background-color);
-    }
-    /* Dark theme compatible containers */
-    .query-result {
-        background: rgba(255, 255, 255, 0.05);
-        backdrop-filter: blur(10px);
-        border: 1px solid rgba(255, 255, 255, 0.1);
-        padding: 1.5rem;
-        border-radius: 15px;
-        margin: 1rem 0;
-        color: var(--text-color);
-    }
-    .source-doc {
-        background: rgba(31, 119, 180, 0.1);
-        backdrop-filter: blur(5px);
-        padding: 1rem;
-        border-left: 4px solid #1f77b4;
-        border-radius: 8px;
-        margin: 0.5rem 0;
-        color: var(--text-color);
-    }
-    .share-link {
-        background: rgba(46, 204, 113, 0.1);
-        backdrop-filter: blur(5px);
-        padding: 1rem;
-        border-radius: 10px;
-        border-left: 4px solid #2ecc71;
-        color: var(--text-color);
-    }
-    /* Model indicator boxes */
-    .model-info {
-        background: rgba(52, 152, 219, 0.15);
-        backdrop-filter: blur(10px);
-        padding: 15px;
-        border-radius: 12px;
-        border-left: 4px solid #3498db;
-        margin: 10px 0;
-    }
-    /* Language selection enhancement */
-    .language-selection {
-        background: rgba(155, 89, 182, 0.1);
-        backdrop-filter: blur(10px);
-        padding: 15px;
-        border-radius: 12px;
-        border-left: 4px solid #9b59b6;
-        margin: 10px 0;
-    }
-    /* Upload area enhancement */
-    .stFileUploader {
-        background: rgba(230, 126, 34, 0.1);
-        backdrop-filter: blur(10px);
-        padding: 20px;
-        border-radius: 15px;
-        border: 2px dashed #e67e22;
-    }
-    .stFileUploader label {
-        font-size: 1.2rem;
-        font-weight: bold;
-        color: var(--text-color);
-    }
-    /* Button enhancements */
-    .stButton > button {
-        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-        color: white;
-        border: none;
-        border-radius: 10px;
-        padding: 0.6rem 1.5rem;
-        font-weight: 600;
-        transition: all 0.3s ease;
-        box-shadow: 0 4px 15px 0 rgba(31, 38, 135, 0.37);
-    }
-    .stButton > button:hover {
-        transform: translateY(-2px);
-        box-shadow: 0 6px 20px 0 rgba(31, 38, 135, 0.5);
-    }
-    /* Sidebar enhancements */
-    .css-1d391kg {
-        background: rgba(255, 255, 255, 0.02);
-        backdrop-filter: blur(10px);
-    }
-    /* Info boxes */
-    .stInfo {
-        background: rgba(52, 152, 219, 0.1);
-        backdrop-filter: blur(10px);
-        border-left: 4px solid #3498db;
-    }
-    .stSuccess {
-        background: rgba(46, 204, 113, 0.1);
-        backdrop-filter: blur(10px);
-        border-left: 4px solid #2ecc71;
-    }
-    .stWarning {
-        background: rgba(241, 196, 15, 0.1);
-        backdrop-filter: blur(10px);
-        border-left: 4px solid #f1c40f;
-    }
-    .stError {
-        background: rgba(231, 76, 60, 0.1);
-        backdrop-filter: blur(10px);
-        border-left: 4px solid #e74c3c;
-    }
-</style>
-""", unsafe_allow_html=True)
 def main():
     # Check for shared query in URL
     query_params = st.query_params
@@ -182,388 +66,12 @@ def main():
         upload_documents_page()
     elif st.session_state.current_page == "🗂 Manage Documents":
         manage_documents_page()
-    elif st.session_state.current_page == "ℹ️ About":
         about_page()
     else:
         search_page()
-def upload_documents_page():
-    st.header("📄 Upload University Documents")
-    st.write("Upload PDF documents containing university admission requirements, fees, and program information.")
-    col1, col2 = st.columns(2)
-    with col1:
-        university_name = st.text_input("🏫 University Name", placeholder="e.g., National University of Singapore")
-        country = st.selectbox(
-            "🌏 Country",
-            ["", "Singapore", "Malaysia", "Thailand", "Indonesia", "Philippines", "Vietnam", "Brunei", "Cambodia", "Laos", "Myanmar"]
-        )
-    with col2:
-        document_type = st.selectbox(
-            "📋 Document Type",
-            ["admission_requirements", "tuition_fees", "program_catalog", "application_guide", "scholarship_info"]
-        )
-        language = st.selectbox(
-            "🌐 Primary Language",
-            ["English", "Chinese", "Malay", "Thai", "Indonesian", "Vietnamese", "Filipino", "Other"]
-        )
-    # File upload
-    uploaded_files = st.file_uploader(
-        "Choose PDF files",
-        accept_multiple_files=True,
-        type=['pdf'],
-        help="Select one or more PDF files to upload"
-    )
-    if uploaded_files and st.button("🚀 Process Documents", type="primary"):
-        if not university_name or not country:
-            st.error("Please provide university name and country.")
-            return
-        with st.spinner("Processing documents... This may take a few minutes."):
-            try:
-                # Initialize document ingestion
-                doc_ingestion = DocumentIngestion()
-                # Process documents
-                documents = doc_ingestion.process_documents(
-                    uploaded_files, university_name, country, document_type
-                )
-                if documents:
-                    # Create or update vector store
-                    vectorstore = doc_ingestion.create_vector_store(documents)
-                    if vectorstore:
-                        st.success(f"✅ Successfully processed {len(documents)} documents!")
-                        st.info(f"Documents from {university_name} ({country}) have been added to the knowledge base.")
-                        # Show processed files
-                        with st.expander("📋 Processed Files"):
-                            for doc in documents:
-                                st.write(f"• **{doc.metadata['source']}**")
-                                st.write(f"  - University: {doc.metadata['university']}")
-                                st.write(f"  - Country: {doc.metadata['country']}")
-                                st.write(f"  - Type: {doc.metadata['document_type']}")
-                                st.write("---")
-                else:
-                    st.error("No documents were successfully processed.")
-            except Exception as e:
-                st.error(f"Error processing documents: {str(e)}")
-def manage_documents_page():
-    st.header("🗂 Manage Documents in Database")
-    st.write("View and delete documents currently stored in the Chroma vector database.")
-    from rag_system import DocumentIngestion
-    doc_ingestion = DocumentIngestion()
-    vectorstore = doc_ingestion.load_existing_vectorstore()
-    if not vectorstore:
-        st.warning("No vector store found. Upload documents first.")
-        return
-    # Get all documents (chunks) in the vectorstore
-    try:
-        # Chroma stores documents as chunks, but we want to show original metadata
-        # We'll group by file_id to show unique documents
-        collection = vectorstore._collection
-        all_docs = collection.get(include=["metadatas", "documents"])  # Removed 'ids'
-        metadatas = all_docs["metadatas"]
-        ids = all_docs["ids"]  # ids are always returned
-        documents = all_docs["documents"]
-        # Group by file_id
-        doc_map = {}
-        for meta, doc_id, doc_text in zip(metadatas, ids, documents):
-            file_id = meta.get("file_id", doc_id)
-            if file_id not in doc_map:
-                doc_map[file_id] = {
-                    "source": meta.get("source", "Unknown"),
-                    "university": meta.get("university", "Unknown"),
-                    "country": meta.get("country", "Unknown"),
-                    "document_type": meta.get("document_type", "Unknown"),
-                    "upload_timestamp": meta.get("upload_timestamp", "Unknown"),
-                    "file_id": file_id,
-                    "chunks": []
-                }
-            doc_map[file_id]["chunks"].append(doc_text)
-        if not doc_map:
-            st.info("No documents found in the database.")
-            return
-        st.subheader("Current Documents:")
-        for file_id, info in doc_map.items():
-            with st.expander(f"{info['source']} ({info['university']}, {info['country']})"):
-                st.write(f"**Type:** {info['document_type']}")
-                st.write(f"**Uploaded:** {info['upload_timestamp']}")
-                st.write(f"**File ID:** {file_id}")
-                st.write(f"**Chunks:** {len(info['chunks'])}")
-                if st.button(f"🗑️ Delete Document", key=f"del_{file_id}"):
-                    # Delete all chunks with this file_id
-                    ids_to_delete = [doc_id for meta, doc_id in zip(metadatas, ids) if meta.get("file_id", doc_id) == file_id]
-                    vectorstore._collection.delete(ids=ids_to_delete)
-                    st.success(f"Deleted document: {info['source']}")
-                    st.rerun()
-        # Add Delete All button
-        if doc_map:
-            if st.button("🗑️ Delete All Documents", key="del_all_docs", type="secondary"):
-                all_ids = list(ids)
-                vectorstore._collection.delete(ids=all_ids)
-                st.success("All documents deleted.")
-                st.rerun()
-    except Exception as e:
-        st.error(f"Error loading documents: {str(e)}")
-def search_page():
-    st.header("🔍 Search University Information")
-    # --- Language selection ---
-    col1, col2 = st.columns([3, 1])
-    with col1:
-        st.write("Ask about admissions, fees, scholarships, and programs.")
-    with col2:
-        response_language = st.selectbox(
-            "Language",
-            ["English", "中文 (Chinese)", "Bahasa Malaysia", "ไทย (Thai)",
-             "Bahasa Indonesia", "Tiếng Việt (Vietnamese)"],
-            key="response_language"
-        )
-    language_map = {
-        "English": "English",
-        "中文 (Chinese)": "Chinese",
-        "Bahasa Malaysia": "Malay",
-        "ไทย (Thai)": "Thai",
-        "Bahasa Indonesia": "Indonesian",
-        "Tiếng Việt (Vietnamese)": "Vietnamese"
-    }
-    selected_lang = language_map[response_language]
-    if selected_lang != "English":
-        st.info(f"🌐 Responses will be in **{selected_lang}**")
-    # --- Query input ---
-    query = st.text_area(
-        "Your question:",
-        height=80,
-        placeholder="e.g., Master's in Malaysia under 40,000 RMB/year",
-    )
-    # --- Example queries ---
-    with st.expander("💡 See Example Queries"):
-        tab1, tab2 = st.tabs(["🧠 Complex Queries", "⚡ Simple Queries"])
-        complex_examples = [
-            "Show me universities in Malaysia for master's degrees with tuition under 40,000 RMB per year",
-            "专科毕业，无雅思，想在马来西亚读硕士，学费不超过4万人民币/年",
-            "Compare engineering programs in Thailand and Singapore under $15,000 per year",
-            "Find MBA programs in ASEAN with GMAT requirements and scholarships available"
-        ]
-        simple_examples = [
-            "What does IELTS stand for?",
-            "Translate 'application deadline' to Chinese",
-            "What is the difference between bachelor and master degree?",
-            "How to say 'university' in Thai?"
-        ]
-        for ex in complex_examples:
-            if tab1.button(ex, key=f"complex_{hash(ex)}"):
-                st.session_state.example_query = ex
-        for ex in simple_examples:
-            if tab2.button(ex, key=f"simple_{hash(ex)}"):
-                st.session_state.example_query = ex
-    # --- Use clicked example ---
-    if 'example_query' in st.session_state:
-        query = st.session_state.example_query
-        st.info(f"📝 Using example: {query}")
-        del st.session_state.example_query
-        # Optionally auto-trigger search
-    # --- Optional filters ---
-    with st.expander("🔧 Advanced Filters"):
-        col1, col2, col3 = st.columns(3)
-        budget_range = col1.select_slider(
-            "Budget (USD/year)",
-            options=["Any", "<10k", "10k-20k", "20k-30k", "30k-40k", ">40k"],
-            value="Any"
-        )
-        study_level = col2.multiselect(
-            "Study Level", ["Diploma", "Bachelor", "Master", "PhD"]
-        )
-        preferred_countries = col3.multiselect(
-            "Preferred Countries",
-            ["Singapore", "Malaysia", "Thailand", "Indonesia", "Philippines", "Vietnam", "Brunei"]
-        )
-    # --- Search button ---
-    if st.button("🔍 Search", type="primary", disabled=not query.strip()):
-        st.success("Searching...")  # Placeholder for your RAGSystem logic
-def display_query_result(result, show_share_link=False):
-    """Display query results in a formatted way."""
-    st.markdown('<div class="query-result">', unsafe_allow_html=True)
-    # Show which model was used
-    if result.get("model_used"):
-        st.info(f"🤖 **Model Used:** {result['model_used']}")
-    st.subheader("🎯 Answer")
-    st.write(result["answer"])
-    # Share link
-    if show_share_link and result.get("query_id"):
-        st.markdown("---")
-        current_url = st.get_option("browser.serverAddress") or "localhost:8501"
-        share_url = f"http://{current_url}?share={result['query_id']}"
-        st.markdown(f"""
-        <div class="share-link">
-            <strong>🔗 Share this result:</strong><br>
-            <code>{share_url}</code>
-        </div>
-        """, unsafe_allow_html=True)
-        if st.button("📋 Copy Share Link"):
-            st.code(share_url)
-    # Source documents
-    if result.get("source_documents"):
-        st.markdown("---")
-        st.subheader("📚 Sources")
-        for i, doc in enumerate(result["source_documents"], 1):
-            with st.expander(f"Source {i}: {doc.metadata.get('source', 'Unknown')}"):
-                col1, col2 = st.columns([1, 2])
-                with col1:
-                    st.write(f"**University:** {doc.metadata.get('university', 'Unknown')}")
-                    st.write(f"**Country:** {doc.metadata.get('country', 'Unknown')}")
-                    st.write(f"**Type:** {doc.metadata.get('document_type', 'Unknown')}")
-                with col2:
-                    st.write("**Relevant Content:**")
-                    content_preview = doc.page_content[:300] + "..." if len(doc.page_content) > 300 else doc.page_content
-                    st.write(content_preview)
-    st.markdown('</div>', unsafe_allow_html=True)
-def display_shared_query(query_id):
-    """Display a shared query result."""
-    st.header("🔗 Shared Query Result")
-    result_data = load_shared_query(query_id)
-    if result_data:
-        st.info(f"**Original Question:** {result_data['question']}")
-        st.write(f"**Language:** {result_data['language']}")
-        st.write(f"**Date:** {result_data['timestamp'][:10]}")
-        # Create a mock result object for display
-        mock_result = {
-            "answer": result_data["answer"],
-            "source_documents": [
-                type('MockDoc', (), {
-                    'metadata': source,
-                    'page_content': source.get('content_preview', '')
-                })() for source in result_data.get('sources', [])
-            ]
-        }
-        display_query_result(mock_result, show_share_link=False)
-        if st.button("🔍 Ask Your Own Question"):
-            st.experimental_set_query_params()
-            st.experimental_rerun()
-    else:
-        st.error("❌ Shared query not found or has expired.")
-        if st.button("🏠 Go to Home"):
-            st.experimental_set_query_params()
-            st.experimental_rerun()
-def about_page():
-    st.header("ℹ️ About PanSea University Search")
-    col1, col2 = st.columns([2, 1])
-    with col1:
-        st.markdown("""
-        ### 🎯 Problem We Solve
-        Prospective students worldwide seeking to study abroad face difficulty finding accurate, up-to-date university admission requirements. Information is scattered across PDFs, brochures, and outdated agency websites. Many waste time applying to unsuitable programs due to missing criteria and pay high agent fees.
-        ### 💡 Our Solution
-        PanSea is an LLM-powered, RAG-based study search platform powered by **SEA-LION models** that ingests official admissions documents from ASEAN universities. Students can query in any ASEAN language and receive:
-        - 📋 **Ranked program matches** with detailed requirements
-        - 💰 **Tuition fees and costs**
-        - 📅 **Application deadlines and windows**
-        - 🎓 **Entry requirements and prerequisites**
-        - 📖 **Source citations** from official documents
-        ### 🤖 AI Models Used
-        - **SEA-LION v3.5 Reasoning Model**: For complex university search queries requiring multi-step reasoning
-        - **SEA-LION v3 Instruct Model**: For translation and simple question-answering
-        - **Automatic Model Selection**: The system intelligently chooses the appropriate model based on query complexity
-        ### 🌏 Supported Languages
-        - English
-        - 中文 (Chinese)
-        - Bahasa Malaysia
-        - ไทย (Thai)
-        - Bahasa Indonesia
-        - Tiếng Việt (Vietnamese)
-        - Filipino
-        ### 🔧 How It Works
-        1. **📄 Document Ingestion**: Upload official PDF documents from universities
-        2. **🔍 AI Processing**: Our system processes and indexes the content
-        3. **❓ Natural Language Queries**: Ask questions in your preferred language
-        4. **🎯 Intelligent Answers**: Get relevant, sourced responses
-        5. **🔗 Share Results**: Generate shareable links for your queries
-        """)
-    with col2:
-        st.markdown("""
-        ### 📊 Features
-        ✅ **Multi-language support**
-        ✅ **PDF document ingestion**
-        ✅ **Intelligent search & retrieval**
-        ✅ **Source citations**
-        ✅ **Shareable query results**
-        ✅ **Advanced filtering**
-        ✅ **Real-time processing**
-        ### 🏛️ Target Universities
-        - 🇸🇬 Singapore
-        - 🇲🇾 Malaysia
-        - 🇹🇭 Thailand
-        - 🇮🇩 Indonesia
-        - 🇵🇭 Philippines
-        - 🇻🇳 Vietnam
-        - 🇧🇳 Brunei
-        - 🇰🇭 Cambodia
-        - 🇱🇦 Laos
-        - 🇲🇲 Myanmar
-        ### 🚀 Get Started
-        1. Go to **Upload Documents** to add university PDFs
-        2. Use **Search Universities** to ask questions
-        3. Share your results with others!
-        """)
 if __name__ == "__main__":
     # Check if SEA-LION API key is set
     if not os.getenv("SEA_LION_API_KEY"):

 import streamlit as st
 import os
 from urllib.parse import urlparse, parse_qs
+from utils.rag_system import DocumentIngestion, RAGSystem, save_query_result, load_shared_query
 from datetime import datetime
 import uuid
+from utils.translations import translations
+from pathlib import Path
+from pages.search_uni import search_page
+from pages.upload_documents import upload_documents_page
+from pages.manage_documents import manage_documents_page
+from pages.about import about_page
+from utils.display import display_shared_query
+# Load external CSS
+def load_css(file_name):
+    css_file = Path(file_name)
+    if css_file.exists():
+        with open(css_file) as f:
+            st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
+load_css("styles.css")
 # Configure Streamlit page
 st.set_page_config(
     initial_sidebar_state="expanded"
 )
 def main():
     # Check for shared query in URL
     query_params = st.query_params
         upload_documents_page()
     elif st.session_state.current_page == "🗂 Manage Documents":
         manage_documents_page()
+    elif st.session_state.current_page == "ℹ️ About Top.Edu":
         about_page()
     else:
         search_page()
 if __name__ == "__main__":
     # Check if SEA-LION API key is set
     if not os.getenv("SEA_LION_API_KEY"):

pages/about.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import streamlit as st
+def about_page():
+    st.header("ℹ️ About PanSea University Search")
+    col1, col2 = st.columns([2, 1])
+    with col1:
+        st.markdown("""
+        ### 🎯 Problem We Solve
+        Prospective students worldwide seeking to study abroad face difficulty finding accurate, up-to-date university admission requirements. Information is scattered across PDFs, brochures, and outdated agency websites. Many waste time applying to unsuitable programs due to missing criteria and pay high agent fees.
+        ### 💡 Our Solution
+        PanSea is an LLM-powered, RAG-based study search platform powered by **SEA-LION models** that ingests official admissions documents from ASEAN universities. Students can query in any ASEAN language and receive:
+        - 📋 **Ranked program matches** with detailed requirements
+        - 💰 **Tuition fees and costs**
+        - 📅 **Application deadlines and windows**
+        - 🎓 **Entry requirements and prerequisites**
+        - 📖 **Source citations** from official documents
+        ### 🤖 AI Models Used
+        - **SEA-LION v3.5 Reasoning Model**: For complex university search queries requiring multi-step reasoning
+        - **SEA-LION v3 Instruct Model**: For translation and simple question-answering
+        - **Automatic Model Selection**: The system intelligently chooses the appropriate model based on query complexity
+        ### 🌏 Supported Languages
+        - English
+        - 中文 (Chinese)
+        - Bahasa Malaysia
+        - ไทย (Thai)
+        - Bahasa Indonesia
+        - Tiếng Việt (Vietnamese)
+        - Filipino
+        ### 🔧 How It Works
+        1. **📄 Document Ingestion**: Upload official PDF documents from universities
+        2. **🔍 AI Processing**: Our system processes and indexes the content
+        3. **❓ Natural Language Queries**: Ask questions in your preferred language
+        4. **🎯 Intelligent Answers**: Get relevant, sourced responses
+        5. **🔗 Share Results**: Generate shareable links for your queries
+        """)
+    with col2:
+        st.markdown("""
+        ### 📊 Features
+        ✅ **Multi-language support**
+        ✅ **PDF document ingestion**
+        ✅ **Intelligent search & retrieval**
+        ✅ **Source citations**
+        ✅ **Shareable query results**
+        ✅ **Advanced filtering**
+        ✅ **Real-time processing**
+        ### 🏛️ Target Universities
+        - 🇸🇬 Singapore
+        - 🇲🇾 Malaysia
+        - 🇹🇭 Thailand
+        - 🇮🇩 Indonesia
+        - 🇵🇭 Philippines
+        - 🇻🇳 Vietnam
+        - 🇧🇳 Brunei
+        - 🇰🇭 Cambodia
+        - 🇱🇦 Laos
+        - 🇲🇲 Myanmar
+        ### 🚀 Get Started
+        1. Go to **Upload Documents** to add university PDFs
+        2. Use **Search Universities** to ask questions
+        3. Share your results with others!
+        """)

pages/manage_documents.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import streamlit as st
+from utils.rag_system import DocumentIngestion
+def manage_documents_page():
+    st.header("🗂 Manage Documents in Database")
+    st.write("View and delete documents currently stored in the Chroma vector database.")
+    from utils.rag_system import DocumentIngestion
+    doc_ingestion = DocumentIngestion()
+    vectorstore = doc_ingestion.load_existing_vectorstore()
+    if not vectorstore:
+        st.warning("No vector store found. Upload documents first.")
+        return
+    # Get all documents (chunks) in the vectorstore
+    try:
+        # Chroma stores documents as chunks, but we want to show original metadata
+        # We'll group by file_id to show unique documents
+        collection = vectorstore._collection
+        all_docs = collection.get(include=["metadatas", "documents"])  # Removed 'ids'
+        metadatas = all_docs["metadatas"]
+        ids = all_docs["ids"]  # ids are always returned
+        documents = all_docs["documents"]
+        # Group by file_id
+        doc_map = {}
+        for meta, doc_id, doc_text in zip(metadatas, ids, documents):
+            file_id = meta.get("file_id", doc_id)
+            if file_id not in doc_map:
+                doc_map[file_id] = {
+                    "source": meta.get("source", "Unknown"),
+                    "university": meta.get("university", "Unknown"),
+                    "country": meta.get("country", "Unknown"),
+                    "document_type": meta.get("document_type", "Unknown"),
+                    "upload_timestamp": meta.get("upload_timestamp", "Unknown"),
+                    "file_id": file_id,
+                    "chunks": []
+                }
+            doc_map[file_id]["chunks"].append(doc_text)
+        if not doc_map:
+            st.info("No documents found in the database.")
+            return
+        st.subheader("Current Documents:")
+        for file_id, info in doc_map.items():
+            with st.expander(f"{info['source']} ({info['university']}, {info['country']})"):
+                st.write(f"**Type:** {info['document_type']}")
+                st.write(f"**Uploaded:** {info['upload_timestamp']}")
+                st.write(f"**File ID:** {file_id}")
+                st.write(f"**Chunks:** {len(info['chunks'])}")
+                if st.button(f"🗑️ Delete Document", key=f"del_{file_id}"):
+                    # Delete all chunks with this file_id
+                    ids_to_delete = [doc_id for meta, doc_id in zip(metadatas, ids) if meta.get("file_id", doc_id) == file_id]
+                    vectorstore._collection.delete(ids=ids_to_delete)
+                    st.success(f"Deleted document: {info['source']}")
+                    st.rerun()
+        # Add Delete All button
+        if doc_map:
+            if st.button("🗑️ Delete All Documents", key="del_all_docs", type="secondary"):
+                all_ids = list(ids)
+                vectorstore._collection.delete(ids=all_ids)
+                st.success("All documents deleted.")
+                st.rerun()
+    except Exception as e:
+        st.error(f"Error loading documents: {str(e)}")

pages/search_uni.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import streamlit as st
+def search_page():
+    st.header("🔍 Search University Information")
+    # --- Language selection ---
+    col1, col2 = st.columns([3, 1])
+    with col1:
+        st.write("Ask about admissions, fees, scholarships, and programs.")
+    with col2:
+        response_language = st.selectbox(
+            "Language",
+            ["English", "中文 (Chinese)", "Bahasa Malaysia", "ไทย (Thai)",
+             "Bahasa Indonesia", "Tiếng Việt (Vietnamese)"],
+            key="response_language"
+        )
+    language_map = {
+        "English": "English",
+        "中文 (Chinese)": "中文",
+        "Bahasa Malaysia": "Malay",
+        "ไทย (Thai)": "ไทย",
+        "Bahasa Indonesia": "Indonesian",
+        "Tiếng Việt (Vietnamese)": "Tiếng Việt"
+    }
+    selected_lang = language_map[response_language]
+    if selected_lang != "English":
+        st.info(f"🌐 Responses will be in **{selected_lang}**")
+    # --- Query input ---
+    query = st.text_area(
+        "Your question:",
+        height=80,
+        placeholder="e.g., Master's in Malaysia under 40,000 RMB/year",
+    )
+    # --- Example queries ---
+    with st.expander("💡 See Example Queries"):
+        tab1, tab2 = st.tabs(["🧠 Complex Queries", "⚡ Simple Queries"])
+        complex_examples = [
+            "Show me universities in Malaysia for master's degrees with tuition under 40,000 RMB per year",
+            "专科毕业，无雅思，想在马来西亚读硕士，学费不超过4万人民币/年",
+            "Compare engineering programs in Thailand and Singapore under $15,000 per year",
+            "Find MBA programs in ASEAN with GMAT requirements and scholarships available"
+        ]
+        simple_examples = [
+            "What does IELTS stand for?",
+            "Translate 'application deadline' to Chinese",
+            "What is the difference between bachelor and master degree?",
+            "How to say 'university' in Thai?"
+        ]
+        for ex in complex_examples:
+            if tab1.button(ex, key=f"complex_{hash(ex)}"):
+                st.session_state.example_query = ex
+        for ex in simple_examples:
+            if tab2.button(ex, key=f"simple_{hash(ex)}"):
+                st.session_state.example_query = ex
+    # --- Use clicked example ---
+    if 'example_query' in st.session_state:
+        query = st.session_state.example_query
+        st.info(f"📝 Using example: {query}")
+        del st.session_state.example_query
+        # Optionally auto-trigger search
+    # --- Optional filters ---
+    with st.expander("🔧 Advanced Filters"):
+        col1, col2, col3 = st.columns(3)
+        budget_range = col1.select_slider(
+            "Budget (USD/year)",
+            options=["Any", "<10k", "10k-20k", "20k-30k", "30k-40k", ">40k"],
+            value="Any"
+        )
+        study_level = col2.multiselect(
+            "Study Level", ["Diploma", "Bachelor", "Master", "PhD"]
+        )
+        preferred_countries = col3.multiselect(
+            "Preferred Countries",
+            ["Singapore", "Malaysia", "Thailand", "Indonesia", "Philippines", "Vietnam", "Brunei"]
+        )
+    # --- Search button ---
+    if st.button("🔍 Search", type="primary", disabled=not query.strip()):
+        st.success("Searching...")  # Placeholder for your RAGSystem logic

pages/upload_documents.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import streamlit as st
+from utils.rag_system import DocumentIngestion
+def upload_documents_page():
+    st.header("📄 Upload University Documents")
+    st.write("Upload PDF documents containing university admission requirements, fees, and program information.")
+    col1, col2 = st.columns(2)
+    with col1:
+        university_name = st.text_input("🏫 University Name", placeholder="e.g., National University of Singapore")
+        country = st.selectbox(
+            "🌏 Country",
+            ["", "Singapore", "Malaysia", "Thailand", "Indonesia", "Philippines", "Vietnam", "Brunei", "Cambodia", "Laos", "Myanmar"]
+        )
+    with col2:
+        document_type = st.selectbox(
+            "📋 Document Type",
+            ["admission_requirements", "tuition_fees", "program_catalog", "application_guide", "scholarship_info"]
+        )
+        language = st.selectbox(
+            "🌐 Primary Language",
+            ["English", "Chinese", "Malay", "Thai", "Indonesian", "Vietnamese", "Filipino", "Other"]
+        )
+    # File upload
+    uploaded_files = st.file_uploader(
+        "Choose PDF files",
+        accept_multiple_files=True,
+        type=['pdf'],
+        help="Select one or more PDF files to upload"
+    )
+    if uploaded_files and st.button("🚀 Process Documents", type="primary"):
+        if not university_name or not country:
+            st.error("Please provide university name and country.")
+            return
+        with st.spinner("Processing documents... This may take a few minutes."):
+            try:
+                # Initialize document ingestion
+                doc_ingestion = DocumentIngestion()
+                # Process documents
+                documents = doc_ingestion.process_documents(
+                    uploaded_files, university_name, country, document_type
+                )
+                if documents:
+                    # Create or update vector store
+                    vectorstore = doc_ingestion.create_vector_store(documents)
+                    if vectorstore:
+                        st.success(f"✅ Successfully processed {len(documents)} documents!")
+                        st.info(f"Documents from {university_name} ({country}) have been added to the knowledge base.")
+                        # Show processed files
+                        with st.expander("📋 Processed Files"):
+                            for doc in documents:
+                                st.write(f"• **{doc.metadata['source']}**")
+                                st.write(f"  - University: {doc.metadata['university']}")
+                                st.write(f"  - Country: {doc.metadata['country']}")
+                                st.write(f"  - Type: {doc.metadata['document_type']}")
+                                st.write("---")
+                else:
+                    st.error("No documents were successfully processed.")
+            except Exception as e:
+                st.error(f"Error processing documents: {str(e)}")

styles.css ADDED Viewed

	@@ -0,0 +1,126 @@

+.main-header {
+  text-align: center;
+  padding: 2rem 0;
+  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+  color: white;
+  margin: -1rem -1rem 2rem -1rem;
+  border-radius: 10px;
+  box-shadow: 0 4px 15px 0 rgba(31, 38, 135, 0.37);
+}
+.stApp {
+  background: var(--background-color);
+}
+/* Dark theme compatible containers */
+.query-result {
+  background: rgba(255, 255, 255, 0.05);
+  backdrop-filter: blur(10px);
+  border: 1px solid rgba(255, 255, 255, 0.1);
+  padding: 1.5rem;
+  border-radius: 15px;
+  margin: 1rem 0;
+  color: var(--text-color);
+}
+.source-doc {
+  background: rgba(31, 119, 180, 0.1);
+  backdrop-filter: blur(5px);
+  padding: 1rem;
+  border-left: 4px solid #1f77b4;
+  border-radius: 8px;
+  margin: 0.5rem 0;
+  color: var(--text-color);
+}
+.share-link {
+  background: rgba(46, 204, 113, 0.1);
+  backdrop-filter: blur(5px);
+  padding: 1rem;
+  border-radius: 10px;
+  border-left: 4px solid #2ecc71;
+  color: var(--text-color);
+}
+/* Model indicator boxes */
+.model-info {
+  background: rgba(52, 152, 219, 0.15);
+  backdrop-filter: blur(10px);
+  padding: 15px;
+  border-radius: 12px;
+  border-left: 4px solid #3498db;
+  margin: 10px 0;
+}
+/* Language selection enhancement */
+.language-selection {
+  background: rgba(155, 89, 182, 0.1);
+  backdrop-filter: blur(10px);
+  padding: 15px;
+  border-radius: 12px;
+  border-left: 4px solid #9b59b6;
+  margin: 10px 0;
+}
+/* Upload area enhancement */
+.stFileUploader {
+  background: rgba(230, 126, 34, 0.1);
+  backdrop-filter: blur(10px);
+  padding: 20px;
+  border-radius: 15px;
+  border: 2px dashed #e67e22;
+}
+.stFileUploader label {
+  font-size: 1.2rem;
+  font-weight: bold;
+  color: var(--text-color);
+}
+/* Button enhancements */
+.stButton > button {
+  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+  color: white;
+  border: none;
+  border-radius: 10px;
+  padding: 0.6rem 1.5rem;
+  font-weight: 600;
+  transition: all 0.3s ease;
+  box-shadow: 0 4px 15px 0 rgba(31, 38, 135, 0.37);
+}
+.stButton > button:hover {
+  transform: translateY(-2px);
+  box-shadow: 0 6px 20px 0 rgba(31, 38, 135, 0.5);
+}
+/* Sidebar enhancements */
+.css-1d391kg {
+  background: rgba(255, 255, 255, 0.02);
+  backdrop-filter: blur(10px);
+}
+/* Info boxes */
+.stInfo {
+  background: rgba(52, 152, 219, 0.1);
+  backdrop-filter: blur(10px);
+  border-left: 4px solid #3498db;
+}
+.stSuccess {
+  background: rgba(46, 204, 113, 0.1);
+  backdrop-filter: blur(10px);
+  border-left: 4px solid #2ecc71;
+}
+.stWarning {
+  background: rgba(241, 196, 15, 0.1);
+  backdrop-filter: blur(10px);
+  border-left: 4px solid #f1c40f;
+}
+.stError {
+  background: rgba(231, 76, 60, 0.1);
+  backdrop-filter: blur(10px);
+  border-left: 4px solid #e74c3c;
+}

test_system.py CHANGED Viewed

@@ -37,7 +37,7 @@ def test_imports():
         return False
     try:
-        from rag_system import DocumentIngestion, RAGSystem
         print("✅ RAG system modules imported successfully")
     except ImportError as e:
         print(f"❌ Failed to import RAG system: {e}")
@@ -107,7 +107,7 @@ def test_basic_functionality():
     print("\n⚡ Testing basic functionality...")
     try:
-        from rag_system import DocumentIngestion, SEALionLLM
         # Test document ingestion initialization
         doc_ingestion = DocumentIngestion()

         return False
     try:
+        from utils.rag_system import DocumentIngestion, RAGSystem
         print("✅ RAG system modules imported successfully")
     except ImportError as e:
         print(f"❌ Failed to import RAG system: {e}")
     print("\n⚡ Testing basic functionality...")
     try:
+        from utils.rag_system import DocumentIngestion, SEALionLLM
         # Test document ingestion initialization
         doc_ingestion = DocumentIngestion()

utils/display.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import streamlit as st
+from utils.rag_system import DocumentIngestion, RAGSystem, save_query_result, load_shared_query
+def display_query_result(result, show_share_link=False):
+    """Display query results in a formatted way."""
+    st.markdown('<div class="query-result">', unsafe_allow_html=True)
+    # Show which model was used
+    if result.get("model_used"):
+        st.info(f"🤖 **Model Used:** {result['model_used']}")
+    st.subheader("🎯 Answer")
+    st.write(result["answer"])
+    # Share link
+    if show_share_link and result.get("query_id"):
+        st.markdown("---")
+        current_url = st.get_option("browser.serverAddress") or "localhost:8501"
+        share_url = f"http://{current_url}?share={result['query_id']}"
+        st.markdown(f"""
+        <div class="share-link">
+            <strong>🔗 Share this result:</strong><br>
+            <code>{share_url}</code>
+        </div>
+        """, unsafe_allow_html=True)
+        if st.button("📋 Copy Share Link"):
+            st.code(share_url)
+    # Source documents
+    if result.get("source_documents"):
+        st.markdown("---")
+        st.subheader("📚 Sources")
+        for i, doc in enumerate(result["source_documents"], 1):
+            with st.expander(f"Source {i}: {doc.metadata.get('source', 'Unknown')}"):
+                col1, col2 = st.columns([1, 2])
+                with col1:
+                    st.write(f"**University:** {doc.metadata.get('university', 'Unknown')}")
+                    st.write(f"**Country:** {doc.metadata.get('country', 'Unknown')}")
+                    st.write(f"**Type:** {doc.metadata.get('document_type', 'Unknown')}")
+                with col2:
+                    st.write("**Relevant Content:**")
+                    content_preview = doc.page_content[:300] + "..." if len(doc.page_content) > 300 else doc.page_content
+                    st.write(content_preview)
+    st.markdown('</div>', unsafe_allow_html=True)
+def display_shared_query(query_id):
+    """Display a shared query result."""
+    st.header("🔗 Shared Query Result")
+    result_data = load_shared_query(query_id)
+    if result_data:
+        st.info(f"**Original Question:** {result_data['question']}")
+        st.write(f"**Language:** {result_data['language']}")
+        st.write(f"**Date:** {result_data['timestamp'][:10]}")
+        # Create a mock result object for display
+        mock_result = {
+            "answer": result_data["answer"],
+            "source_documents": [
+                type('MockDoc', (), {
+                    'metadata': source,
+                    'page_content': source.get('content_preview', '')
+                })() for source in result_data.get('sources', [])
+            ]
+        }
+        display_query_result(mock_result, show_share_link=False)
+        if st.button("🔍 Ask Your Own Question"):
+            st.experimental_set_query_params()
+            st.experimental_rerun()
+    else:
+        st.error("❌ Shared query not found or has expired.")
+        if st.button("🏠 Go to Home"):
+            st.experimental_set_query_params()
+            st.experimental_rerun()

rag_system.py → utils/rag_system.py RENAMED Viewed

File without changes

utils/translations.py ADDED Viewed

	@@ -0,0 +1,100 @@

+# translations.py
+translations = {
+    "en": {
+        "search_header": "🔍 Search University Information",
+        "ask_prompt": "Ask about admissions, fees, scholarships, and programs.",
+        "language_label": "Language",
+        "your_question": "Your question:",
+        "placeholder": "e.g., Master's in Malaysia under 40,000 RMB/year",
+        "example_queries": "💡 See Example Queries",
+        "complex_queries": "🧠 Complex Queries",
+        "simple_queries": "⚡ Simple Queries",
+        "advanced_filters": "🔧 Advanced Filters",
+        "budget_label": "Budget (USD/year)",
+        "study_level": "Study Level",
+        "preferred_countries": "Preferred Countries",
+        "search_button": "🔍 Search",
+        "searching_msg": "Searching..."
+    },
+    "zh": {
+        "search_header": "🔍 搜索大学信息",
+        "ask_prompt": "询问入学、学费、奖学金和课程信息。",
+        "language_label": "语言",
+        "your_question": "你的问题：",
+        "placeholder": "例如：在马来西亚读硕士，每年学费低于4万人民币",
+        "example_queries": "💡 查看示例问题",
+        "complex_queries": "🧠 复杂查询",
+        "simple_queries": "⚡ 简单查询",
+        "advanced_filters": "🔧 高级筛选",
+        "budget_label": "预算 (美元/年)",
+        "study_level": "学历层次",
+        "preferred_countries": "首选国家",
+        "search_button": "🔍 搜索",
+        "searching_msg": "正在搜索..."
+    },
+    "ms": {
+        "search_header": "🔍 Cari Maklumat Universiti",
+        "ask_prompt": "Tanya tentang kemasukan, yuran, biasiswa, dan program.",
+        "language_label": "Bahasa",
+        "your_question": "Soalan anda:",
+        "placeholder": "contoh: Sarjana di Malaysia bawah 40,000 RMB/tahun",
+        "example_queries": "💡 Lihat Contoh Soalan",
+        "complex_queries": "🧠 Soalan Kompleks",
+        "simple_queries": "⚡ Soalan Mudah",
+        "advanced_filters": "🔧 Penapis Lanjutan",
+        "budget_label": "Bajet (USD/tahun)",
+        "study_level": "Peringkat Pengajian",
+        "preferred_countries": "Negara Pilihan",
+        "search_button": "🔍 Cari",
+        "searching_msg": "Sedang mencari..."
+    },
+    "th": {
+        "search_header": "🔍 ค้นหาข้อมูลมหาวิทยาลัย",
+        "ask_prompt": "สอบถามเกี่ยวกับการรับเข้าเรียน ค่าธรรมเนียม ทุนการศึกษา และหลักสูตร",
+        "language_label": "ภาษา",
+        "your_question": "คำถามของคุณ:",
+        "placeholder": "เช่น ปริญญาโทในมาเลเซียไม่เกิน 40,000 หยวน/ปี",
+        "example_queries": "💡 ดูตัวอย่างคำถาม",
+        "complex_queries": "🧠 คำถามซับซ้อน",
+        "simple_queries": "⚡ คำถามง่าย",
+        "advanced_filters": "🔧 ตัวกรองขั้นสูง",
+        "budget_label": "งบประมาณ (USD/ปี)",
+        "study_level": "ระดับการศึกษา",
+        "preferred_countries": "ประเทศที่ต้องการ",
+        "search_button": "🔍 ค้นหา",
+        "searching_msg": "กำลังค้นหา..."
+    },
+    "id": {
+        "search_header": "🔍 Cari Informasi Universitas",
+        "ask_prompt": "Tanyakan tentang penerimaan, biaya, beasiswa, dan program.",
+        "language_label": "Bahasa",
+        "your_question": "Pertanyaan Anda:",
+        "placeholder": "contoh: Magister di Malaysia di bawah 40,000 RMB/tahun",
+        "example_queries": "💡 Lihat Contoh Pertanyaan",
+        "complex_queries": "🧠 Pertanyaan Kompleks",
+        "simple_queries": "⚡ Pertanyaan Sederhana",
+        "advanced_filters": "🔧 Filter Lanjutan",
+        "budget_label": "Anggaran (USD/tahun)",
+        "study_level": "Tingkat Pendidikan",
+        "preferred_countries": "Negara Pilihan",
+        "search_button": "🔍 Cari",
+        "searching_msg": "Mencari..."
+    },
+    "vi": {
+        "search_header": "🔍 Tìm kiếm Thông tin Đại học",
+        "ask_prompt": "Hỏi về tuyển sinh, học phí, học bổng và chương trình học.",
+        "language_label": "Ngôn ngữ",
+        "your_question": "Câu hỏi của bạn:",
+        "placeholder": "ví dụ: Thạc sĩ tại Malaysia dưới 40,000 RMB/năm",
+        "example_queries": "💡 Xem Câu hỏi Mẫu",
+        "complex_queries": "🧠 Câu hỏi Phức tạp",
+        "simple_queries": "⚡ Câu hỏi Đơn giản",
+        "advanced_filters": "🔧 Bộ lọc Nâng cao",
+        "budget_label": "Ngân sách (USD/năm)",
+        "study_level": "Trình độ Học vấn",
+        "preferred_countries": "Quốc gia Ưu tiên",
+        "search_button": "🔍 Tìm kiếm",
+        "searching_msg": "Đang tìm kiếm..."
+    }
+}