Rehman1603 commited on
Commit
1e8c35b
1 Parent(s): 0d8e0fc

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -0
app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pytube
3
+ from youtube_transcript_api import YouTubeTranscriptApi as yt
4
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
5
+ import os
6
+ from langchain import PromptTemplate
7
+ from langchain import LLMChain
8
+ from langchain_together import Together
9
+ import re
10
+ import pdfplumber
11
+ # Set the API key with double quotes
12
+
13
+ os.environ['TOGETHER_API_KEY'] = "d88cb7414e4039a84d2ed63f1b47daaaa4230c4c53a422045d8a30a9a3bc87d8"
14
+
15
+ text = ""
16
+ max_pages = 16
17
+ with pdfplumber.open("/content/New Data Set.pdf") as pdf:
18
+ for i, page in enumerate(pdf.pages):
19
+ if i >= max_pages:
20
+ break
21
+ text += page.extract_text() + "\n"
22
+
23
+ def Bot(Questions):
24
+ chat_template = """
25
+ Based on the provided context: {text}
26
+ Please answer the following question: {Questions}
27
+
28
+ Only provide answers that are directly related to the context. If the question is unrelated, respond with "I don't know".
29
+ """
30
+ prompt = PromptTemplate(
31
+ input_variables=['text', 'Questions'],
32
+ template=chat_template
33
+ )
34
+ llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=50)
35
+ Generated_chat = LLMChain(llm=llama3, prompt=prompt)
36
+
37
+ try:
38
+ response = Generated_chat.invoke({
39
+ "text": text,
40
+ "Questions": Questions
41
+ })
42
+
43
+ response_text = response['text']
44
+
45
+ response_text = response_text.replace("assistant", "")
46
+
47
+ # Post-processing to handle repeated words and ensure completeness
48
+ words = response_text.split()
49
+ seen = set()
50
+ filtered_words = [word for word in words if word.lower() not in seen and not seen.add(word.lower())]
51
+ response_text = ' '.join(filtered_words)
52
+ response_text = response_text.strip() # Ensuring no extra spaces at the ends
53
+ if not response_text.endswith('.'):
54
+ response_text += '.'
55
+
56
+ return response_text
57
+ except Exception as e:
58
+ return f"Error in generating response: {e}"
59
+
60
+ def ChatBot(Questions):
61
+ greetings = ["hi", "hello", "hey", "greetings", "what's up", "howdy"]
62
+ # Check if the input question is a greeting
63
+ question_lower = Questions.lower().strip()
64
+ if question_lower in greetings or any(question_lower.startswith(greeting) for greeting in greetings):
65
+ return "Hello! How can I assist you with the document today?"
66
+ else:
67
+ response=Bot(Questions)
68
+ return response.translate(str.maketrans('', '', '\n'))
69
+ # text_embedding = model.encode(text, convert_to_tensor=True)
70
+ # statement_embedding = model.encode(statement, convert_to_tensor=True)
71
+
72
+ # # Compute the cosine similarity between the embeddings
73
+ # similarity = util.pytorch_cos_sim(text_embedding, statement_embedding)
74
+
75
+ # # Print the similarity score
76
+ # print(f"Cosine similarity: {similarity.item()}")
77
+
78
+ # # Define a threshold for considering the statement as related
79
+ # threshold = 0.7
80
+
81
+ # if similarity.item() > threshold:
82
+ # response=Bot(Questions)
83
+ # return response
84
+ # else:
85
+ # response="The statement is not related to the text."
86
+ # return response
87
+
88
+ iface = gr.Interface(fn=ChatBot, inputs="text", outputs="text", title="Chatbot")
89
+ iface.launch(debug=True)
90
+
91
+
92
+
93
+
94
+
95
+
96
+
97
+
98
+