from datetime import datetime
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_parse import LlamaParse
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
import os
from dotenv import load_dotenv
import gradio as gr

# Load environment variables
load_dotenv()

models = [
    "mistralai/Mixtral-8x7B-Instruct-v0.1",
    "meta-llama/Meta-Llama-3-8B-Instruct",
    # "NousResearch/Yarn-Mistral-7b-64k",  ## 14GB>10GB
    # "impira/layoutlm-document-qa",  ## ERR
    # "Qwen/Qwen1.5-7B",      ## 15GB
    # "Qwen/Qwen2.5-3B",      ## high response time
    # "google/gemma-2-2b-jpn-it",   ## high response time
    # "impira/layoutlm-invoices",   ## bad req
    # "google/pix2struct-docvqa-large",  ## bad req
    "mistralai/Mistral-7B-Instruct-v0.2",
    # "google/gemma-7b-it", ## 17GB > 10GB
    # "google/gemma-2b-it",  ## high response time
    # "HuggingFaceH4/zephyr-7b-beta",   ## high response time
    # "HuggingFaceH4/zephyr-7b-gemma-v0.1",     ## bad req
    # "microsoft/phi-2",    ## high response time
    # "TinyLlama/TinyLlama-1.1B-Chat-v1.0",     ## high response time
    # "mosaicml/mpt-7b-instruct",     ## 13GB>10GB
    "tiiuae/falcon-7b-instruct",
    # "google/flan-t5-xxl" ## high respons time
    # "NousResearch/Yarn-Mistral-7b-128k",  ## 14GB>10GB
    # "Qwen/Qwen2.5-7B-Instruct",     ## 15GB>10GB
]

# Global variable for selected model
selected_model_name = models[0]  # Default to the first model in the list

# Initialize the parser
parser = LlamaParse(api_key=os.getenv("LLAMA_INDEX_API"), result_type='markdown')
# Define file extractor with various common extensions
file_extractor = {
    '.pdf': parser,  # PDF documents
    '.docx': parser,  # Microsoft Word documents
    '.doc': parser,  # Older Microsoft Word documents
    '.txt': parser,  # Plain text files
    '.csv': parser,  # Comma-separated values files
    '.xlsx': parser,  # Microsoft Excel files (requires additional processing for tables)
    '.pptx': parser,  # Microsoft PowerPoint files (for slides)
    '.html': parser,  # HTML files (web pages)
    # '.rtf': parser,  # Rich Text Format files
    # '.odt': parser,  # OpenDocument Text files
    # '.epub': parser,  # ePub files (e-books)

    # Image files for OCR processing
    '.jpg': parser,  # JPEG images
    '.jpeg': parser,  # JPEG images
    '.png': parser,  # PNG images
    # '.bmp': parser,  # Bitmap images
    # '.tiff': parser,  # TIFF images
    # '.tif': parser,  # TIFF images (alternative extension)
    # '.gif': parser,  # GIF images (can contain text)

    # Scanned documents in image formats
    '.webp': parser,  # WebP images
    '.svg': parser,  # SVG files (vector format, may contain embedded text)
}

# Embedding model and index initialization (to be populated by uploaded files)
# embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")   ## Works good
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en")    ## works good
# embed_model2 = HuggingFaceEmbedding(model_name="NeuML/pubmedbert-base-embeddings") ## works good

# sentence-transformers/distilbert-base-nli-mean-tokens
# BAAI/bge-large-en
# embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Global variable to store documents loaded from user-uploaded files
vector_index = None


# File processing function
def load_files(file_path: str):
    try:
        global vector_index
        document = SimpleDirectoryReader(input_files=[file_path], file_extractor=file_extractor).load_data()
        vector_index = VectorStoreIndex.from_documents(document, embed_model=embed_model)
        print(f"Parsing done for {file_path}")
        filename = os.path.basename(file_path)
        return f"Ready to give response on {filename}"
    except Exception as e:
        return f"An error occurred: {e}"


# Function to handle the selected model from dropdown
def set_model(selected_model):
    global selected_model_name
    selected_model_name = selected_model  # Update the global variable
    # print(f"Model selected: {selected_model_name}")
    # return f"Model set to: {selected_model_name}"


# Respond function that uses the globally set selected model
def respond(message, history):
    try:
        # Initialize the LLM with the selected model
        llm = HuggingFaceInferenceAPI(
            model_name=selected_model_name,
            contextWindow = 4096,
            maxTokens = 4096,
            temperature=0.7,
            topP=0.95,
            # token=os.getenv("TOKEN")
        )

        # Check selected model
        # print(f"Using model: {selected_model_name}")

        # Set up the query engine with the selected LLM
        query_engine = vector_index.as_query_engine(llm=llm)
        bot_message = query_engine.query(message)

        print(f"\n{datetime.now()}:{selected_model_name}:: {message} --> {str(bot_message)}\n")
        return f"{selected_model_name}:\n{str(bot_message)}"
    except Exception as e:
        if str(e) == "'NoneType' object has no attribute 'as_query_engine'":
            return "Please upload a file."
        return f"An error occurred: {e}"


# UI Setup
with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column(scale=1):
            file_input = gr.File(file_count="single", type='filepath', label="Step-1: Upload document")
            with gr.Row():
                clear = gr.ClearButton()
                btn = gr.Button("Submit", variant='primary')
            output = gr.Text(label='Vector Index')
            model_dropdown = gr.Dropdown(models, label="Step-2: Select Model", interactive=True)

        with gr.Column(scale=3):
            gr.ChatInterface(
                fn=respond,
                chatbot=gr.Chatbot(height=500),
                textbox=gr.Textbox(placeholder="Step-3: Ask me questions on the uploaded document!", container=False, scale=7)
            )

    # Set up Gradio interactions
    model_dropdown.change(fn=set_model, inputs=model_dropdown)
    btn.click(fn=load_files, inputs=file_input, outputs=output)
    clear.click(lambda: [None] * 2, outputs=[file_input, output])

# Launch the demo with a public link option
if __name__ == "__main__":
    demo.launch()