jordyvl commited on
Commit
11182a0
1 Parent(s): 31eef4a

GPU enabled - small bug fix for LLM

Browse files
Files changed (1) hide show
  1. app.py +7 -12
app.py CHANGED
@@ -128,14 +128,11 @@ default_query_engine = load_RAG_pipeline(config)
128
 
129
  # These are placeholder functions to simulate the behavior of the RAG setup.
130
  # You would need to implement these with the actual logic to retrieve and generate answers based on the document.
131
- def get_answer(question, config, query_engine=default_query_engine):
132
  # Here you should implement the logic to generate an answer based on the question and the document.
133
  # For example, you could use a machine learning model for RAG.
134
  # answer = "This is a placeholder answer."
135
  # https://docs.llamaindex.ai/en/stable/module_guides/supporting_modules/settings/#setting-local-configurations
136
-
137
- # if temperature or nucleus sampling or max_tokens != as in config, recall query engine
138
-
139
  response = query_engine.query(question)
140
  print(f"A: {response}")
141
  return response
@@ -153,8 +150,6 @@ def get_answer_page(response):
153
  # Create the gr.Interface function
154
  def ask_my_thesis(
155
  question,
156
- LLM=config["LLM"],
157
- embeddings=config["embeddings"],
158
  similarity_top_k=config["similarity_top_k"],
159
  context_window=config["context_window"],
160
  max_new_tokens=config["max_new_tokens"],
@@ -173,8 +168,8 @@ def ask_my_thesis(
173
  temperature != config["temperature"]
174
  or top_p != config["top_p"]
175
  or max_new_tokens != config["max_new_tokens"]
176
- or LLM != config["LLM"]
177
- or embeddings != config["embeddings"]
178
  or similarity_top_k != config["similarity_top_k"]
179
  or context_window != config["context_window"]
180
  or top_k != config["top_k"]
@@ -185,7 +180,7 @@ def ask_my_thesis(
185
  config["temperature"] = temperature
186
  config["top_p"] = top_p
187
  config["max_new_tokens"] = max_new_tokens
188
- config["LLM"] = LLM
189
  # config["embeddings"] = embeddings
190
  config["similarity_top_k"] = similarity_top_k
191
  config["context_window"] = context_window
@@ -213,11 +208,11 @@ additional_inputs = [
213
  # gr.Input("text", label="Question"),
214
  # gr.Input("text", label="LLM", value=config["LLM"]),
215
  # gr.Input("text", label="Embeddings", value=config["embeddings"]),
216
- gr.Slider(1, 5, value=config["similarity_top_k"], label="Similarity Top K"),
217
  gr.Slider(512, 8048, value=config["context_window"], label="Context Window"),
218
- gr.Slider(20, 250, value=config["max_new_tokens"], label="Max New Tokens"),
219
  gr.Slider(0, 1, value=config["temperature"], label="Temperature"),
220
- gr.Slider(1, 10, value=config["top_k"], label="Top K"),
221
  gr.Slider(0, 1, value=config["top_p"], label="Nucleus Sampling"),
222
  gr.Slider(128, 4024, value=config["chunk_size"], label="Chunk Size"),
223
  gr.Slider(0, 200, value=config["chunk_overlap"], label="Chunk Overlap"),
 
128
 
129
  # These are placeholder functions to simulate the behavior of the RAG setup.
130
  # You would need to implement these with the actual logic to retrieve and generate answers based on the document.
131
+ def get_answer(question, query_engine=default_query_engine):
132
  # Here you should implement the logic to generate an answer based on the question and the document.
133
  # For example, you could use a machine learning model for RAG.
134
  # answer = "This is a placeholder answer."
135
  # https://docs.llamaindex.ai/en/stable/module_guides/supporting_modules/settings/#setting-local-configurations
 
 
 
136
  response = query_engine.query(question)
137
  print(f"A: {response}")
138
  return response
 
150
  # Create the gr.Interface function
151
  def ask_my_thesis(
152
  question,
 
 
153
  similarity_top_k=config["similarity_top_k"],
154
  context_window=config["context_window"],
155
  max_new_tokens=config["max_new_tokens"],
 
168
  temperature != config["temperature"]
169
  or top_p != config["top_p"]
170
  or max_new_tokens != config["max_new_tokens"]
171
+ # or LLM != config["LLM"]
172
+ # or embeddings != config["embeddings"]
173
  or similarity_top_k != config["similarity_top_k"]
174
  or context_window != config["context_window"]
175
  or top_k != config["top_k"]
 
180
  config["temperature"] = temperature
181
  config["top_p"] = top_p
182
  config["max_new_tokens"] = max_new_tokens
183
+ # config["LLM"] = LLM
184
  # config["embeddings"] = embeddings
185
  config["similarity_top_k"] = similarity_top_k
186
  config["context_window"] = context_window
 
208
  # gr.Input("text", label="Question"),
209
  # gr.Input("text", label="LLM", value=config["LLM"]),
210
  # gr.Input("text", label="Embeddings", value=config["embeddings"]),
211
+ gr.Slider(1, 5, value=config["similarity_top_k"], label="Similarity Top K", step=1),
212
  gr.Slider(512, 8048, value=config["context_window"], label="Context Window"),
213
+ gr.Slider(20, 500, value=config["max_new_tokens"], label="Max New Tokens"),
214
  gr.Slider(0, 1, value=config["temperature"], label="Temperature"),
215
+ gr.Slider(1, 10, value=config["top_k"], label="Top K", step=1),
216
  gr.Slider(0, 1, value=config["top_p"], label="Nucleus Sampling"),
217
  gr.Slider(128, 4024, value=config["chunk_size"], label="Chunk Size"),
218
  gr.Slider(0, 200, value=config["chunk_overlap"], label="Chunk Overlap"),