ChatBotAgenticRAG_dup

Sleeping

App Files Files Community

Phoenix21 commited on about 1 month ago

Commit

e27c8c7

verified ·

1 Parent(s): 5144d12

Update pipeline.py

Browse files

Files changed (1) hide show

pipeline.py +45 -13

pipeline.py CHANGED Viewed

@@ -10,6 +10,7 @@ from langchain.chains import RetrievalQA
 from smolagents import CodeAgent, DuckDuckGoSearchTool, ManagedAgent, LiteLLMModel
 import subprocess  # Import subprocess to run shell commands
 from langchain.llms.base import LLM  # Import LLM
 # Import the chain builders from our separate files
 from classification_chain import get_classification_chain
@@ -22,6 +23,11 @@ if not os.environ.get("GEMINI_API_KEY"):
     os.environ["GEMINI_API_KEY"] = getpass.getpass("Enter your Gemini API Key: ")
 if not os.environ.get("GROQ_API_KEY"):
     os.environ["GROQ_API_KEY"] = getpass.getpass("Enter your GROQ API Key: ")
 # 2) Load spaCy model for NER and download the spaCy model if not already installed
 def install_spacy_model():
@@ -67,7 +73,28 @@ def extract_main_topic(query: str) -> str:
     # Return the extracted topic or a fallback value if no topic is found
     return main_topic if main_topic else "this topic"
-# 3) build_or_load_vectorstore (no changes)
 def build_or_load_vectorstore(csv_path: str, store_dir: str) -> FAISS:
     if os.path.exists(store_dir):
         print(f"DEBUG: Found existing FAISS store at '{store_dir}'. Loading...")
@@ -96,7 +123,7 @@ def build_or_load_vectorstore(csv_path: str, store_dir: str) -> FAISS:
         vectorstore.save_local(store_dir)
         return vectorstore
-# 4) Build RAG chain for Gemini (no changes)
 def build_rag_chain(llm_model: LiteLLMModel, vectorstore: FAISS) -> RetrievalQA:
     class GeminiLangChainLLM(LLM):
         def _call(self, prompt: str, stop: Optional[list] = None, **kwargs) -> str:
@@ -117,13 +144,13 @@ def build_rag_chain(llm_model: LiteLLMModel, vectorstore: FAISS) -> RetrievalQA:
     )
     return rag_chain
-# 5) Initialize all the separate chains
 classification_chain = get_classification_chain()
 refusal_chain = get_refusal_chain()  # Refusal chain will now use dynamic topic
 tailor_chain = get_tailor_chain()
 cleaner_chain = get_cleaner_chain()
-# 6) Build our vectorstores + RAG chains
 wellness_csv = "AIChatbot.csv"
 brand_csv = "BrandAI.csv"
 wellness_store_dir = "faiss_wellness_store"
@@ -136,7 +163,7 @@ gemini_llm = LiteLLMModel(model_id="gemini/gemini-pro", api_key=os.environ.get("
 wellness_rag_chain = build_rag_chain(gemini_llm, wellness_vectorstore)
 brand_rag_chain = build_rag_chain(gemini_llm, brand_vectorstore)
-# 7) Tools / Agents for web search (no changes)
 search_tool = DuckDuckGoSearchTool()
 web_agent = CodeAgent(tools=[search_tool], model=gemini_llm)
 managed_web_agent = ManagedAgent(agent=web_agent, name="web_search", description="Runs web search for you.")
@@ -148,19 +175,24 @@ def do_web_search(query: str) -> str:
     response = manager_agent.run(search_query)
     return response
-# 8) Orchestrator: run_with_chain
 def run_with_chain(query: str) -> str:
     print("DEBUG: Starting run_with_chain...")
-    # 1) Classify the query
-    class_result = classification_chain.invoke({"query": query})
     classification = class_result.get("text", "").strip()
     print("DEBUG: Classification =>", classification)
     # If OutOfScope => refusal => tailor => return
     if classification == "OutOfScope":
         # Extract the main topic for the refusal message
-        topic = extract_main_topic(query)
         print("DEBUG: Extracted Topic =>", topic)
         # Pass the extracted topic to the refusal chain
@@ -170,14 +202,14 @@ def run_with_chain(query: str) -> str:
     # If Wellness => wellness RAG => if insufficient => web => unify => tailor
     if classification == "Wellness":
-        rag_result = wellness_rag_chain({"query": query})
         csv_answer = rag_result["result"].strip()
         if not csv_answer:
-            web_answer = do_web_search(query)
         else:
             lower_ans = csv_answer.lower()
             if any(phrase in lower_ans for phrase in ["i do not know", "not sure", "no context", "cannot answer"]):
-                web_answer = do_web_search(query)
             else:
                 web_answer = ""
         final_merged = cleaner_chain.merge(kb=csv_answer, web=web_answer)
@@ -186,7 +218,7 @@ def run_with_chain(query: str) -> str:
     # If Brand => brand RAG => tailor => return
     if classification == "Brand":
-        rag_result = brand_rag_chain({"query": query})
         csv_answer = rag_result["result"].strip()
         final_merged = cleaner_chain.merge(kb=csv_answer, web="")
         final_answer = tailor_chain.run({"response": final_merged})

 from smolagents import CodeAgent, DuckDuckGoSearchTool, ManagedAgent, LiteLLMModel
 import subprocess  # Import subprocess to run shell commands
 from langchain.llms.base import LLM  # Import LLM
+from mistralai import Mistral  # Import Mistral for moderation
 # Import the chain builders from our separate files
 from classification_chain import get_classification_chain
     os.environ["GEMINI_API_KEY"] = getpass.getpass("Enter your Gemini API Key: ")
 if not os.environ.get("GROQ_API_KEY"):
     os.environ["GROQ_API_KEY"] = getpass.getpass("Enter your GROQ API Key: ")
+if not os.environ.get("MISTRAL_API_KEY"):
+    os.environ["MISTRAL_API_KEY"] = getpass.getpass("Enter your Mistral API Key: ")
+# Initialize Mistral client
+mistral_client = Mistral(api_key=os.environ["MISTRAL_API_KEY"])
 # 2) Load spaCy model for NER and download the spaCy model if not already installed
 def install_spacy_model():
     # Return the extracted topic or a fallback value if no topic is found
     return main_topic if main_topic else "this topic"
+# 3) Function to moderate text using Mistral moderation API
+def moderate_text(query: str) -> str:
+    """
+    Classifies the query as harmful or not using Mistral Moderation API.
+    Returns "OutOfScope" if harmful, otherwise returns the original query.
+    """
+    response = mistral_client.classifiers.moderate(
+        model="mistral-moderation-latest",
+        inputs=[query]
+    )
+    categories = response.results[0].categories
+    # Check if any harmful category is flagged
+    if categories.get("violence_and_threats", False) or \
+       categories.get("hate_and_discrimination", False) or \
+       categories.get("dangerous_and_criminal_content", False) or \
+       categories.get("selfharm", False):
+        return "OutOfScope"
+    return query
+# 4) build_or_load_vectorstore (no changes)
 def build_or_load_vectorstore(csv_path: str, store_dir: str) -> FAISS:
     if os.path.exists(store_dir):
         print(f"DEBUG: Found existing FAISS store at '{store_dir}'. Loading...")
         vectorstore.save_local(store_dir)
         return vectorstore
+# 5) Build RAG chain for Gemini (no changes)
 def build_rag_chain(llm_model: LiteLLMModel, vectorstore: FAISS) -> RetrievalQA:
     class GeminiLangChainLLM(LLM):
         def _call(self, prompt: str, stop: Optional[list] = None, **kwargs) -> str:
     )
     return rag_chain
+# 6) Initialize all the separate chains
 classification_chain = get_classification_chain()
 refusal_chain = get_refusal_chain()  # Refusal chain will now use dynamic topic
 tailor_chain = get_tailor_chain()
 cleaner_chain = get_cleaner_chain()
+# 7) Build our vectorstores + RAG chains
 wellness_csv = "AIChatbot.csv"
 brand_csv = "BrandAI.csv"
 wellness_store_dir = "faiss_wellness_store"
 wellness_rag_chain = build_rag_chain(gemini_llm, wellness_vectorstore)
 brand_rag_chain = build_rag_chain(gemini_llm, brand_vectorstore)
+# 8) Tools / Agents for web search (no changes)
 search_tool = DuckDuckGoSearchTool()
 web_agent = CodeAgent(tools=[search_tool], model=gemini_llm)
 managed_web_agent = ManagedAgent(agent=web_agent, name="web_search", description="Runs web search for you.")
     response = manager_agent.run(search_query)
     return response
+# 9) Orchestrator: run_with_chain
 def run_with_chain(query: str) -> str:
     print("DEBUG: Starting run_with_chain...")
+    # 1) Moderate the query for harmful content
+    moderated_query = moderate_text(query)
+    if moderated_query == "OutOfScope":
+        return "Sorry, this query contains harmful or inappropriate content."
+    # 2) Classify the query
+    class_result = classification_chain.invoke({"query": moderated_query})
     classification = class_result.get("text", "").strip()
     print("DEBUG: Classification =>", classification)
     # If OutOfScope => refusal => tailor => return
     if classification == "OutOfScope":
         # Extract the main topic for the refusal message
+        topic = extract_main_topic(moderated_query)
         print("DEBUG: Extracted Topic =>", topic)
         # Pass the extracted topic to the refusal chain
     # If Wellness => wellness RAG => if insufficient => web => unify => tailor
     if classification == "Wellness":
+        rag_result = wellness_rag_chain({"query": moderated_query})
         csv_answer = rag_result["result"].strip()
         if not csv_answer:
+            web_answer = do_web_search(moderated_query)
         else:
             lower_ans = csv_answer.lower()
             if any(phrase in lower_ans for phrase in ["i do not know", "not sure", "no context", "cannot answer"]):
+                web_answer = do_web_search(moderated_query)
             else:
                 web_answer = ""
         final_merged = cleaner_chain.merge(kb=csv_answer, web=web_answer)
     # If Brand => brand RAG => tailor => return
     if classification == "Brand":
+        rag_result = brand_rag_chain({"query": moderated_query})
         csv_answer = rag_result["result"].strip()
         final_merged = cleaner_chain.merge(kb=csv_answer, web="")
         final_answer = tailor_chain.run({"response": final_merged})