Spaces:
Paused
Paused
GPU enabled - small bug fix for LLM
Browse files
app.py
CHANGED
@@ -128,14 +128,11 @@ default_query_engine = load_RAG_pipeline(config)
|
|
128 |
|
129 |
# These are placeholder functions to simulate the behavior of the RAG setup.
|
130 |
# You would need to implement these with the actual logic to retrieve and generate answers based on the document.
|
131 |
-
def get_answer(question,
|
132 |
# Here you should implement the logic to generate an answer based on the question and the document.
|
133 |
# For example, you could use a machine learning model for RAG.
|
134 |
# answer = "This is a placeholder answer."
|
135 |
# https://docs.llamaindex.ai/en/stable/module_guides/supporting_modules/settings/#setting-local-configurations
|
136 |
-
|
137 |
-
# if temperature or nucleus sampling or max_tokens != as in config, recall query engine
|
138 |
-
|
139 |
response = query_engine.query(question)
|
140 |
print(f"A: {response}")
|
141 |
return response
|
@@ -153,8 +150,6 @@ def get_answer_page(response):
|
|
153 |
# Create the gr.Interface function
|
154 |
def ask_my_thesis(
|
155 |
question,
|
156 |
-
LLM=config["LLM"],
|
157 |
-
embeddings=config["embeddings"],
|
158 |
similarity_top_k=config["similarity_top_k"],
|
159 |
context_window=config["context_window"],
|
160 |
max_new_tokens=config["max_new_tokens"],
|
@@ -173,8 +168,8 @@ def ask_my_thesis(
|
|
173 |
temperature != config["temperature"]
|
174 |
or top_p != config["top_p"]
|
175 |
or max_new_tokens != config["max_new_tokens"]
|
176 |
-
or LLM != config["LLM"]
|
177 |
-
or embeddings != config["embeddings"]
|
178 |
or similarity_top_k != config["similarity_top_k"]
|
179 |
or context_window != config["context_window"]
|
180 |
or top_k != config["top_k"]
|
@@ -185,7 +180,7 @@ def ask_my_thesis(
|
|
185 |
config["temperature"] = temperature
|
186 |
config["top_p"] = top_p
|
187 |
config["max_new_tokens"] = max_new_tokens
|
188 |
-
config["LLM"] = LLM
|
189 |
# config["embeddings"] = embeddings
|
190 |
config["similarity_top_k"] = similarity_top_k
|
191 |
config["context_window"] = context_window
|
@@ -213,11 +208,11 @@ additional_inputs = [
|
|
213 |
# gr.Input("text", label="Question"),
|
214 |
# gr.Input("text", label="LLM", value=config["LLM"]),
|
215 |
# gr.Input("text", label="Embeddings", value=config["embeddings"]),
|
216 |
-
gr.Slider(1, 5, value=config["similarity_top_k"], label="Similarity Top K"),
|
217 |
gr.Slider(512, 8048, value=config["context_window"], label="Context Window"),
|
218 |
-
gr.Slider(20,
|
219 |
gr.Slider(0, 1, value=config["temperature"], label="Temperature"),
|
220 |
-
gr.Slider(1, 10, value=config["top_k"], label="Top K"),
|
221 |
gr.Slider(0, 1, value=config["top_p"], label="Nucleus Sampling"),
|
222 |
gr.Slider(128, 4024, value=config["chunk_size"], label="Chunk Size"),
|
223 |
gr.Slider(0, 200, value=config["chunk_overlap"], label="Chunk Overlap"),
|
|
|
128 |
|
129 |
# These are placeholder functions to simulate the behavior of the RAG setup.
|
130 |
# You would need to implement these with the actual logic to retrieve and generate answers based on the document.
|
131 |
+
def get_answer(question, query_engine=default_query_engine):
|
132 |
# Here you should implement the logic to generate an answer based on the question and the document.
|
133 |
# For example, you could use a machine learning model for RAG.
|
134 |
# answer = "This is a placeholder answer."
|
135 |
# https://docs.llamaindex.ai/en/stable/module_guides/supporting_modules/settings/#setting-local-configurations
|
|
|
|
|
|
|
136 |
response = query_engine.query(question)
|
137 |
print(f"A: {response}")
|
138 |
return response
|
|
|
150 |
# Create the gr.Interface function
|
151 |
def ask_my_thesis(
|
152 |
question,
|
|
|
|
|
153 |
similarity_top_k=config["similarity_top_k"],
|
154 |
context_window=config["context_window"],
|
155 |
max_new_tokens=config["max_new_tokens"],
|
|
|
168 |
temperature != config["temperature"]
|
169 |
or top_p != config["top_p"]
|
170 |
or max_new_tokens != config["max_new_tokens"]
|
171 |
+
# or LLM != config["LLM"]
|
172 |
+
# or embeddings != config["embeddings"]
|
173 |
or similarity_top_k != config["similarity_top_k"]
|
174 |
or context_window != config["context_window"]
|
175 |
or top_k != config["top_k"]
|
|
|
180 |
config["temperature"] = temperature
|
181 |
config["top_p"] = top_p
|
182 |
config["max_new_tokens"] = max_new_tokens
|
183 |
+
# config["LLM"] = LLM
|
184 |
# config["embeddings"] = embeddings
|
185 |
config["similarity_top_k"] = similarity_top_k
|
186 |
config["context_window"] = context_window
|
|
|
208 |
# gr.Input("text", label="Question"),
|
209 |
# gr.Input("text", label="LLM", value=config["LLM"]),
|
210 |
# gr.Input("text", label="Embeddings", value=config["embeddings"]),
|
211 |
+
gr.Slider(1, 5, value=config["similarity_top_k"], label="Similarity Top K", step=1),
|
212 |
gr.Slider(512, 8048, value=config["context_window"], label="Context Window"),
|
213 |
+
gr.Slider(20, 500, value=config["max_new_tokens"], label="Max New Tokens"),
|
214 |
gr.Slider(0, 1, value=config["temperature"], label="Temperature"),
|
215 |
+
gr.Slider(1, 10, value=config["top_k"], label="Top K", step=1),
|
216 |
gr.Slider(0, 1, value=config["top_p"], label="Nucleus Sampling"),
|
217 |
gr.Slider(128, 4024, value=config["chunk_size"], label="Chunk Size"),
|
218 |
gr.Slider(0, 200, value=config["chunk_overlap"], label="Chunk Overlap"),
|