import gradio as gr
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma
from huggingface_hub import InferenceClient

embeddings = SentenceTransformerEmbeddings(model_name="msmarco-distilbert-base-v4")
db = Chroma(persist_directory="embeddings", embedding_function=embeddings)

client = InferenceClient(model="mistralai/Mixtral-8x7B-Instruct-v0.1")


def respond(
        message,
        history: list[tuple[str, str]],
):
    messages = []

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    matching_docs = db.similarity_search(message)

    if not matching_docs:
        prompt = (
            f"<s>[INST] You are an expert in generating responses when there is no information available. "
            f"Unfortunately, there are no relevant documents available to answer the following query:\n\n"
            f"Query: {message}\n\n"
            f"Please provide a polite and original response to inform the user that the requested information is not "
            f"available.[/INST]</s>"
        )
    else:
        context = ""
        current_length = 0
        for i, doc in enumerate(matching_docs):
            doc_text = f"Document {i + 1}:\n{doc.page_content}\n\n"
            doc_length = len(doc_text.split())
            context += doc_text
            current_length += doc_length

        prompt = (
            f"<s>[INST] You are an expert in summarizing and answering questions based on given documents. "
            f"You're an expert in English grammar at the same time. "
            f"This means that your texts are flawless, correct and grammatically correct."
            f"Never write in the output response what document the response is in. It looks very unprofessional."
            f"Please provide a detailed and well-explained answer to the following query in 4-6 sentences:\n\n"
            f"Query: {message}\n\n"
            f"Based on the following documents:\n{context}\n\n"
            f"Answer:[/INST]</s>"
        )

    messages.append({"role": "user", "content": prompt})

    response = ""

    for message in client.chat_completion(
            messages,
            max_tokens=250,
            stream=True,
            temperature=0.7,
            top_p=0.95,
    ):
        token = message.choices[0].delta.content

        response += token
        yield response


demo = gr.ChatInterface(
    respond,
    title="Boost.space Docs LLM",
)

if __name__ == "__main__":
    demo.launch()