Spaces:

jordyvl
/

ask_my_thesis

Paused

App Files Files Community

jordyvl commited on Apr 19

Commit

11182a0

•

1 Parent(s): 31eef4a

GPU enabled - small bug fix for LLM

Browse files

Files changed (1) hide show

app.py +7 -12

app.py CHANGED Viewed

@@ -128,14 +128,11 @@ default_query_engine = load_RAG_pipeline(config)
 # These are placeholder functions to simulate the behavior of the RAG setup.
 # You would need to implement these with the actual logic to retrieve and generate answers based on the document.
-def get_answer(question, config, query_engine=default_query_engine):
     # Here you should implement the logic to generate an answer based on the question and the document.
     # For example, you could use a machine learning model for RAG.
     # answer = "This is a placeholder answer."
     # https://docs.llamaindex.ai/en/stable/module_guides/supporting_modules/settings/#setting-local-configurations
-    # if temperature or nucleus sampling or max_tokens != as in config, recall query engine
     response = query_engine.query(question)
     print(f"A: {response}")
     return response
@@ -153,8 +150,6 @@ def get_answer_page(response):
 # Create the gr.Interface function
 def ask_my_thesis(
     question,
-    LLM=config["LLM"],
-    embeddings=config["embeddings"],
     similarity_top_k=config["similarity_top_k"],
     context_window=config["context_window"],
     max_new_tokens=config["max_new_tokens"],
@@ -173,8 +168,8 @@ def ask_my_thesis(
         temperature != config["temperature"]
         or top_p != config["top_p"]
         or max_new_tokens != config["max_new_tokens"]
-        or LLM != config["LLM"]
-        or embeddings != config["embeddings"]
         or similarity_top_k != config["similarity_top_k"]
         or context_window != config["context_window"]
         or top_k != config["top_k"]
@@ -185,7 +180,7 @@ def ask_my_thesis(
         config["temperature"] = temperature
         config["top_p"] = top_p
         config["max_new_tokens"] = max_new_tokens
-        config["LLM"] = LLM
         # config["embeddings"] = embeddings
         config["similarity_top_k"] = similarity_top_k
         config["context_window"] = context_window
@@ -213,11 +208,11 @@ additional_inputs = [
     # gr.Input("text", label="Question"),
     # gr.Input("text", label="LLM", value=config["LLM"]),
     # gr.Input("text", label="Embeddings", value=config["embeddings"]),
-    gr.Slider(1, 5, value=config["similarity_top_k"], label="Similarity Top K"),
     gr.Slider(512, 8048, value=config["context_window"], label="Context Window"),
-    gr.Slider(20, 250, value=config["max_new_tokens"], label="Max New Tokens"),
     gr.Slider(0, 1, value=config["temperature"], label="Temperature"),
-    gr.Slider(1, 10, value=config["top_k"], label="Top K"),
     gr.Slider(0, 1, value=config["top_p"], label="Nucleus Sampling"),
     gr.Slider(128, 4024, value=config["chunk_size"], label="Chunk Size"),
     gr.Slider(0, 200, value=config["chunk_overlap"], label="Chunk Overlap"),

 # These are placeholder functions to simulate the behavior of the RAG setup.
 # You would need to implement these with the actual logic to retrieve and generate answers based on the document.
+def get_answer(question, query_engine=default_query_engine):
     # Here you should implement the logic to generate an answer based on the question and the document.
     # For example, you could use a machine learning model for RAG.
     # answer = "This is a placeholder answer."
     # https://docs.llamaindex.ai/en/stable/module_guides/supporting_modules/settings/#setting-local-configurations
     response = query_engine.query(question)
     print(f"A: {response}")
     return response
 # Create the gr.Interface function
 def ask_my_thesis(
     question,
     similarity_top_k=config["similarity_top_k"],
     context_window=config["context_window"],
     max_new_tokens=config["max_new_tokens"],
         temperature != config["temperature"]
         or top_p != config["top_p"]
         or max_new_tokens != config["max_new_tokens"]
+        # or LLM != config["LLM"]
+        # or embeddings != config["embeddings"]
         or similarity_top_k != config["similarity_top_k"]
         or context_window != config["context_window"]
         or top_k != config["top_k"]
         config["temperature"] = temperature
         config["top_p"] = top_p
         config["max_new_tokens"] = max_new_tokens
+        # config["LLM"] = LLM
         # config["embeddings"] = embeddings
         config["similarity_top_k"] = similarity_top_k
         config["context_window"] = context_window
     # gr.Input("text", label="Question"),
     # gr.Input("text", label="LLM", value=config["LLM"]),
     # gr.Input("text", label="Embeddings", value=config["embeddings"]),
+    gr.Slider(1, 5, value=config["similarity_top_k"], label="Similarity Top K", step=1),
     gr.Slider(512, 8048, value=config["context_window"], label="Context Window"),
+    gr.Slider(20, 500, value=config["max_new_tokens"], label="Max New Tokens"),
     gr.Slider(0, 1, value=config["temperature"], label="Temperature"),
+    gr.Slider(1, 10, value=config["top_k"], label="Top K", step=1),
     gr.Slider(0, 1, value=config["top_p"], label="Nucleus Sampling"),
     gr.Slider(128, 4024, value=config["chunk_size"], label="Chunk Size"),
     gr.Slider(0, 200, value=config["chunk_overlap"], label="Chunk Overlap"),