import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import os
from langchain import PromptTemplate
from langchain import LLMChain
from langchain_together import Together
import re
import pdfplumber
# Set the API key with double quotes

os.environ['TOGETHER_API_KEY'] = "d88cb7414e4039a84d2ed63f1b47daaaa4230c4c53a422045d8a30a9a3bc87d8"

text = ""
max_pages = 16
with pdfplumber.open("New Data Set.pdf") as pdf:
        for i, page in enumerate(pdf.pages):
            if i >= max_pages:
                break
            text += page.extract_text() + "\n"

def Bot(Questions):
    chat_template = """
    Based on the provided context: {text}
    Please answer the following question: {Questions}

    Only provide answers that are directly related to the context. If the question is unrelated, respond with "I don't know".
    """
    prompt = PromptTemplate(
        input_variables=['text', 'Questions'],
        template=chat_template
    )
    llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=50)
    Generated_chat = LLMChain(llm=llama3, prompt=prompt)

    try:
        response = Generated_chat.invoke({
            "text": text,
            "Questions": Questions
        })

        response_text = response['text']

        response_text = response_text.replace("assistant", "")

        # Post-processing to handle repeated words and ensure completeness
        words = response_text.split()
        seen = set()
        filtered_words = [word for word in words if word.lower() not in seen and not seen.add(word.lower())]
        response_text = ' '.join(filtered_words)
        response_text = response_text.strip()  # Ensuring no extra spaces at the ends
        if not response_text.endswith('.'):
            response_text += '.'

        return response_text
    except Exception as e:
        return f"Error in generating response: {e}"

def ChatBot(Questions):
  greetings = ["hi", "hello", "hey", "greetings", "what's up", "howdy"]
    # Check if the input question is a greeting
  question_lower = Questions.lower().strip()
  if question_lower in greetings or any(question_lower.startswith(greeting) for greeting in greetings):
        return "Hello! How can I assist you with the document today?"
  else:
    response=Bot(Questions)
    return response.translate(str.maketrans('', '', '\n'))
  # text_embedding = model.encode(text, convert_to_tensor=True)
  # statement_embedding = model.encode(statement, convert_to_tensor=True)

  # # Compute the cosine similarity between the embeddings
  # similarity = util.pytorch_cos_sim(text_embedding, statement_embedding)

  # # Print the similarity score
  # print(f"Cosine similarity: {similarity.item()}")

  # # Define a threshold for considering the statement as related
  # threshold = 0.7

  # if similarity.item() > threshold:
  #   response=Bot(Questions)
  #   return response
  # else:
  #   response="The statement is not related to the text."
  #   return response

iface = gr.Interface(fn=ChatBot, inputs="text", outputs="text", title="Chatbot")
iface.launch(debug=True)