Mattral commited on
Commit
0b8e4b8
Β·
verified Β·
1 Parent(s): 97cea22

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +127 -0
app.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio_pdf import PDF
3
+ from qdrant_client import models, QdrantClient
4
+ from sentence_transformers import SentenceTransformer
5
+ from PyPDF2 import PdfReader
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain.callbacks.manager import CallbackManager
8
+ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
9
+
10
+ from langchain.vectorstores import Qdrant
11
+ from qdrant_client.http import models
12
+ from ctransformers import AutoModelForCausalLM
13
+
14
+ # Loading the embedding model
15
+ encoder = SentenceTransformer('jinaai/jina-embedding-b-en-v1')
16
+ print("Embedding model loaded...")
17
+
18
+ # Loading the LLM
19
+ callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
20
+
21
+ llm = AutoModelForCausalLM.from_pretrained(
22
+ "TheBloke/Llama-2-7B-Chat-GGUF",
23
+ model_file="llama-2-7b-chat.Q3_K_S.gguf",
24
+ model_type="llama",
25
+ temperature=0.2,
26
+ repetition_penalty=1.5,
27
+ max_new_tokens=300,
28
+ )
29
+
30
+ print("LLM loaded...")
31
+
32
+ def chat(files, question):
33
+ def get_chunks(text):
34
+ text_splitter = RecursiveCharacterTextSplitter(
35
+ chunk_size=250,
36
+ chunk_overlap=50,
37
+ length_function=len,
38
+ )
39
+ chunks = text_splitter.split_text(text)
40
+ return chunks
41
+
42
+ all_chunks = []
43
+
44
+ for file in files:
45
+ pdf_path = file
46
+ reader = PdfReader(pdf_path)
47
+ text = ""
48
+ num_of_pages = len(reader.pages)
49
+
50
+ for page in range(num_of_pages):
51
+ current_page = reader.pages[page]
52
+ text += current_page.extract_text()
53
+
54
+ chunks = get_chunks(text)
55
+ all_chunks.extend(chunks)
56
+
57
+ print(f"Total chunks: {len(all_chunks)}")
58
+ print("Chunks are ready...")
59
+
60
+ client = QdrantClient(path="./db")
61
+ print("DB created...")
62
+
63
+ client.recreate_collection(
64
+ collection_name="my_facts",
65
+ vectors_config=models.VectorParams(
66
+ size=encoder.get_sentence_embedding_dimension(),
67
+ distance=models.Distance.COSINE,
68
+ ),
69
+ )
70
+
71
+ print("Collection created...")
72
+
73
+ li = list(range(len(all_chunks)))
74
+ dic = dict(zip(li, all_chunks))
75
+
76
+ client.upload_records(
77
+ collection_name="my_facts",
78
+ records=[
79
+ models.Record(
80
+ id=idx,
81
+ vector=encoder.encode(dic[idx]).tolist(),
82
+ payload={f"chunk_{idx}": dic[idx]}
83
+ ) for idx in dic.keys()
84
+ ],
85
+ )
86
+
87
+ print("Records uploaded...")
88
+
89
+ hits = client.search(
90
+ collection_name="my_facts",
91
+ query_vector=encoder.encode(question).tolist(),
92
+ limit=3
93
+ )
94
+ context = []
95
+ for hit in hits:
96
+ context.append(list(hit.payload.values())[0])
97
+
98
+ context = " ".join(context)
99
+
100
+ system_prompt = """You are a helpful co-worker, you will use the provided context to answer user questions.
101
+ Read the given context before answering questions and think step by step. If you cannot answer a user question based on
102
+ the provided context, inform the user. Do not use any other information for answering user. Provide a detailed answer to the question."""
103
+
104
+ B_INST, E_INST = "[INST]", "[/INST]"
105
+ B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
106
+
107
+ SYSTEM_PROMPT = B_SYS + system_prompt + E_SYS
108
+
109
+ instruction = f"""
110
+ Context: {context}
111
+ User: {question}"""
112
+
113
+ prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST
114
+ print(prompt_template)
115
+ result = llm(prompt_template)
116
+ return result
117
+
118
+ screen = gr.Interface(
119
+ fn=chat,
120
+ inputs=[gr.File(label="Upload PDFs", file_count="multiple"), gr.Textbox(lines=10, placeholder="Enter your question here πŸ‘‰")],
121
+ outputs=gr.Textbox(lines=10, placeholder="Your answer will be here soon πŸš€"),
122
+ title="Q&A with PDFs πŸ‘©πŸ»β€πŸ’»πŸ““βœπŸ»πŸ’‘",
123
+ description="This app facilitates a conversation with PDFs uploadedπŸ’‘",
124
+ theme="soft",
125
+ )
126
+
127
+ screen.launch()