Spaces:
Sleeping
Sleeping
8bitnand
commited on
Commit
·
c575b59
1
Parent(s):
8b6196b
returning the query itsef, IDK why
Browse files
model.py
CHANGED
@@ -37,12 +37,11 @@ class RAGModel:
|
|
37 |
|
38 |
context = "_ " + "\n-".join(c for c in topk_items)
|
39 |
|
40 |
-
base_prompt = f"""
|
41 |
-
Give time for yourself to read the context and then answer the query.
|
42 |
Do not return thinking process, just return the answer.
|
43 |
If you do not find the answer, or if the query is offesnsive or in any other way harmfull just return "I'm not aware of it"
|
44 |
Now use the following context items to answer the user query.
|
45 |
-
{context}.
|
46 |
user query : {query}
|
47 |
"""
|
48 |
|
@@ -56,7 +55,6 @@ class RAGModel:
|
|
56 |
def answer_query(self, query: str, topk_items: list[str]):
|
57 |
|
58 |
prompt = self.create_prompt(query, topk_items)
|
59 |
-
print(prompt)
|
60 |
input_ids = self.tokenizer(prompt, return_tensors="pt").to(self.device)
|
61 |
output = self.model.generate(**input_ids, max_new_tokens=512)
|
62 |
text = self.tokenizer.decode(output[0])
|
@@ -67,12 +65,13 @@ class RAGModel:
|
|
67 |
if __name__ == "__main__":
|
68 |
|
69 |
configs = load_configs(config_file="rag.configs.yml")
|
70 |
-
query = "what is
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
|
|
|
37 |
|
38 |
context = "_ " + "\n-".join(c for c in topk_items)
|
39 |
|
40 |
+
base_prompt = f"""Give time for yourself to read the context and then answer the query.
|
|
|
41 |
Do not return thinking process, just return the answer.
|
42 |
If you do not find the answer, or if the query is offesnsive or in any other way harmfull just return "I'm not aware of it"
|
43 |
Now use the following context items to answer the user query.
|
44 |
+
context: {context}.
|
45 |
user query : {query}
|
46 |
"""
|
47 |
|
|
|
55 |
def answer_query(self, query: str, topk_items: list[str]):
|
56 |
|
57 |
prompt = self.create_prompt(query, topk_items)
|
|
|
58 |
input_ids = self.tokenizer(prompt, return_tensors="pt").to(self.device)
|
59 |
output = self.model.generate(**input_ids, max_new_tokens=512)
|
60 |
text = self.tokenizer.decode(output[0])
|
|
|
65 |
if __name__ == "__main__":
|
66 |
|
67 |
configs = load_configs(config_file="rag.configs.yml")
|
68 |
+
query = "what is computer vision"
|
69 |
+
g = GoogleSearch(query)
|
70 |
+
data = g.all_page_data
|
71 |
+
d = Document(data, 512)
|
72 |
+
doc_chunks = d.doc()
|
73 |
+
s = SemanticSearch(doc_chunks, "all-mpnet-base-v2", "mps")
|
74 |
+
topk, u = s.semantic_search(query=query, k=32)
|
75 |
+
r = RAGModel(configs)
|
76 |
+
output = r.answer_query(query=query, topk_items=topk)
|
77 |
+
print(output)
|
search.py
CHANGED
@@ -34,12 +34,11 @@ class GoogleSearch:
|
|
34 |
for link in sublist
|
35 |
if len(link) > 0
|
36 |
]
|
37 |
-
|
38 |
return links
|
39 |
|
40 |
def read_url_page(self, url: str) -> str:
|
41 |
|
42 |
-
print(url)
|
43 |
response = requests.get(url, headers=self.headers)
|
44 |
response.raise_for_status()
|
45 |
soup = BeautifulSoup(response.text, "html.parser")
|
@@ -136,7 +135,7 @@ class SemanticSearch:
|
|
136 |
)
|
137 |
|
138 |
def semantic_search(self, query: str, k: int = 10):
|
139 |
-
print("
|
140 |
query_embeding = self.get_embeding(query)
|
141 |
doc_embeding = self.get_embeding(self.doc_chunks)
|
142 |
scores = util.dot_score(a=query_embeding, b=doc_embeding)[0]
|
@@ -147,20 +146,3 @@ class SemanticSearch:
|
|
147 |
def get_embeding(self, text: Union[list[str], str]):
|
148 |
en = self.st.encode(text)
|
149 |
return en
|
150 |
-
|
151 |
-
|
152 |
-
if __name__ == "__main__":
|
153 |
-
|
154 |
-
query = "what is LLM"
|
155 |
-
g = GoogleSearch(query)
|
156 |
-
data = g.all_page_data
|
157 |
-
# d = Document(data, 333)
|
158 |
-
# doc_chunks = d.doc()
|
159 |
-
# s = SemanticSearch(doc_chunks, "all-mpnet-base-v2", "mps")
|
160 |
-
# topk, u = s.semantic_search(query, k=64)
|
161 |
-
# print(len(topk))
|
162 |
-
# print(topk, u)
|
163 |
-
|
164 |
-
# g = GoogleSearch("what is LLM")
|
165 |
-
# d = Document(g.all_page_data)
|
166 |
-
# print(len(d.doc()[0]))
|
|
|
34 |
for link in sublist
|
35 |
if len(link) > 0
|
36 |
]
|
37 |
+
|
38 |
return links
|
39 |
|
40 |
def read_url_page(self, url: str) -> str:
|
41 |
|
|
|
42 |
response = requests.get(url, headers=self.headers)
|
43 |
response.raise_for_status()
|
44 |
soup = BeautifulSoup(response.text, "html.parser")
|
|
|
135 |
)
|
136 |
|
137 |
def semantic_search(self, query: str, k: int = 10):
|
138 |
+
print("Searching Top k in document...")
|
139 |
query_embeding = self.get_embeding(query)
|
140 |
doc_embeding = self.get_embeding(self.doc_chunks)
|
141 |
scores = util.dot_score(a=query_embeding, b=doc_embeding)[0]
|
|
|
146 |
def get_embeding(self, text: Union[list[str], str]):
|
147 |
en = self.st.encode(text)
|
148 |
return en
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|