File size: 3,118 Bytes
1e8c35b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import os
from langchain import PromptTemplate
from langchain import LLMChain
from langchain_together import Together
import re
import pdfplumber
# Set the API key with double quotes

os.environ['TOGETHER_API_KEY'] = "d88cb7414e4039a84d2ed63f1b47daaaa4230c4c53a422045d8a30a9a3bc87d8"

text = ""
max_pages = 16
with pdfplumber.open("/content/New Data Set.pdf") as pdf:
        for i, page in enumerate(pdf.pages):
            if i >= max_pages:
                break
            text += page.extract_text() + "\n"

def Bot(Questions):
    chat_template = """
    Based on the provided context: {text}
    Please answer the following question: {Questions}

    Only provide answers that are directly related to the context. If the question is unrelated, respond with "I don't know".
    """
    prompt = PromptTemplate(
        input_variables=['text', 'Questions'],
        template=chat_template
    )
    llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=50)
    Generated_chat = LLMChain(llm=llama3, prompt=prompt)

    try:
        response = Generated_chat.invoke({
            "text": text,
            "Questions": Questions
        })

        response_text = response['text']

        response_text = response_text.replace("assistant", "")

        # Post-processing to handle repeated words and ensure completeness
        words = response_text.split()
        seen = set()
        filtered_words = [word for word in words if word.lower() not in seen and not seen.add(word.lower())]
        response_text = ' '.join(filtered_words)
        response_text = response_text.strip()  # Ensuring no extra spaces at the ends
        if not response_text.endswith('.'):
            response_text += '.'

        return response_text
    except Exception as e:
        return f"Error in generating response: {e}"

def ChatBot(Questions):
  greetings = ["hi", "hello", "hey", "greetings", "what's up", "howdy"]
    # Check if the input question is a greeting
  question_lower = Questions.lower().strip()
  if question_lower in greetings or any(question_lower.startswith(greeting) for greeting in greetings):
        return "Hello! How can I assist you with the document today?"
  else:
    response=Bot(Questions)
    return response.translate(str.maketrans('', '', '\n'))
  # text_embedding = model.encode(text, convert_to_tensor=True)
  # statement_embedding = model.encode(statement, convert_to_tensor=True)

  # # Compute the cosine similarity between the embeddings
  # similarity = util.pytorch_cos_sim(text_embedding, statement_embedding)

  # # Print the similarity score
  # print(f"Cosine similarity: {similarity.item()}")

  # # Define a threshold for considering the statement as related
  # threshold = 0.7

  # if similarity.item() > threshold:
  #   response=Bot(Questions)
  #   return response
  # else:
  #   response="The statement is not related to the text."
  #   return response

iface = gr.Interface(fn=ChatBot, inputs="text", outputs="text", title="Chatbot")
iface.launch(debug=True)