ChatBotAgenticRAG_dup

Running

App Files Files Community

Phoenix21 commited on 14 days ago

Commit

78bd826

verified ·

1 Parent(s): 5ad1b40

Update pipeline.py

Browse files

Files changed (1) hide show

pipeline.py +21 -68

pipeline.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import os
 import getpass
 import spacy  # Import spaCy for NER functionality
 import pandas as pd
 from typing import Optional
@@ -10,33 +12,18 @@ from langchain.chains import RetrievalQA
 from smolagents import CodeAgent, DuckDuckGoSearchTool, ManagedAgent, LiteLLMModel
 import subprocess  # Import subprocess to run shell commands
 from langchain.llms.base import LLM  # Import LLM
-from mistralai import Mistral  # Import Mistral for moderation
-# Import the chain builders from our separate files
-from classification_chain import get_classification_chain
-from refusal_chain import get_refusal_chain
-from tailor_chain import get_tailor_chain
-from cleaner_chain import get_cleaner_chain, CleanerChain
-# 1) Environment: set up keys if missing
-if not os.environ.get("GEMINI_API_KEY"):
-    os.environ["GEMINI_API_KEY"] = getpass.getpass("Enter your Gemini API Key: ")
-if not os.environ.get("GROQ_API_KEY"):
-    os.environ["GROQ_API_KEY"] = getpass.getpass("Enter your GROQ API Key: ")
-if not os.environ.get("MISTRAL_API_KEY"):
-    os.environ["MISTRAL_API_KEY"] = getpass.getpass("Enter your Mistral API Key: ")
-# Initialize Mistral client
-mistral_client = Mistral(api_key=os.environ["MISTRAL_API_KEY"])
-# 2) Load spaCy model for NER and download the spaCy model if not already installed
 def install_spacy_model():
     try:
-        # Check if the model is already installed
         spacy.load("en_core_web_sm")
         print("spaCy model 'en_core_web_sm' is already installed.")
     except OSError:
-        # If model is not installed, download it using subprocess
         print("Downloading spaCy model 'en_core_web_sm'...")
         subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"], check=True)
         print("spaCy model 'en_core_web_sm' downloaded successfully.")
@@ -47,46 +34,16 @@ install_spacy_model()
 # Load the spaCy model globally
 nlp = spacy.load("en_core_web_sm")
-# Function to extract the main topic using NER
-def extract_main_topic(query: str) -> str:
-    """
-    Extracts the main topic from the user's query using spaCy's NER.
-    Returns the first named entity or noun found in the query.
-    """
-    doc = nlp(query)  # Use the globally loaded spaCy model
-    # Try to extract the main topic as a named entity (person, product, etc.)
-    main_topic = None
-    for ent in doc.ents:
-        # Filter for specific entity types (you can adjust this based on your needs)
-        if ent.label_ in ["ORG", "PRODUCT", "PERSON", "GPE", "TIME"]:  # Add more entity labels as needed
-            main_topic = ent.text
-            break
-    # If no named entity found, fallback to extracting the first noun or proper noun
-    if not main_topic:
-        for token in doc:
-            if token.pos_ in ["NOUN", "PROPN"]:  # Extract first noun or proper noun
-                main_topic = token.text
-                break
-    # Return the extracted topic or a fallback value if no topic is found
-    return main_topic if main_topic else "this topic"
-# 3) Function to moderate text using Mistral moderation API
 def moderate_text(query: str) -> str:
     """
-    Classifies the query as harmful or not using Mistral Moderation API.
     Returns "OutOfScope" if harmful, otherwise returns the original query.
     """
-    response = mistral_client.classifiers.moderate(
-        model="mistral-moderation-latest",
-        inputs=[query]
-    )
-    categories = response.results[0].categories
-    # Check if any harmful category is flagged
     if categories.get("violence_and_threats", False) or \
        categories.get("hate_and_discrimination", False) or \
        categories.get("dangerous_and_criminal_content", False) or \
@@ -94,7 +51,7 @@ def moderate_text(query: str) -> str:
         return "OutOfScope"
     return query
-# 4) build_or_load_vectorstore (no changes)
 def build_or_load_vectorstore(csv_path: str, store_dir: str) -> FAISS:
     if os.path.exists(store_dir):
         print(f"DEBUG: Found existing FAISS store at '{store_dir}'. Loading...")
@@ -123,7 +80,7 @@ def build_or_load_vectorstore(csv_path: str, store_dir: str) -> FAISS:
         vectorstore.save_local(store_dir)
         return vectorstore
-# 5) Build RAG chain for Gemini (no changes)
 def build_rag_chain(llm_model: LiteLLMModel, vectorstore: FAISS) -> RetrievalQA:
     class GeminiLangChainLLM(LLM):
         def _call(self, prompt: str, stop: Optional[list] = None, **kwargs) -> str:
@@ -144,13 +101,13 @@ def build_rag_chain(llm_model: LiteLLMModel, vectorstore: FAISS) -> RetrievalQA:
     )
     return rag_chain
-# 6) Initialize all the separate chains
 classification_chain = get_classification_chain()
 refusal_chain = get_refusal_chain()  # Refusal chain will now use dynamic topic
 tailor_chain = get_tailor_chain()
 cleaner_chain = get_cleaner_chain()
-# 7) Build our vectorstores + RAG chains
 wellness_csv = "AIChatbot.csv"
 brand_csv = "BrandAI.csv"
 wellness_store_dir = "faiss_wellness_store"
@@ -163,7 +120,7 @@ gemini_llm = LiteLLMModel(model_id="gemini/gemini-pro", api_key=os.environ.get("
 wellness_rag_chain = build_rag_chain(gemini_llm, wellness_vectorstore)
 brand_rag_chain = build_rag_chain(gemini_llm, brand_vectorstore)
-# 8) Tools / Agents for web search (no changes)
 search_tool = DuckDuckGoSearchTool()
 web_agent = CodeAgent(tools=[search_tool], model=gemini_llm)
 managed_web_agent = ManagedAgent(agent=web_agent, name="web_search", description="Runs web search for you.")
@@ -175,14 +132,10 @@ def do_web_search(query: str) -> str:
     response = manager_agent.run(search_query)
     return response
-# 9) Orchestrator: run_with_chain
 def run_with_chain(query: str) -> str:
     print("DEBUG: Starting run_with_chain...")
-    # Ensure the query is a string
-    query = str(query).strip()
     # 1) Moderate the query for harmful content
     moderated_query = moderate_text(query)
     if moderated_query == "OutOfScope":

 import os
 import getpass
+from pydantic_ai import Agent  # Import the Agent from pydantic_ai
+from pydantic_ai.models.mistral import MistralModel  # Import the Mistral model
 import spacy  # Import spaCy for NER functionality
 import pandas as pd
 from typing import Optional
 from smolagents import CodeAgent, DuckDuckGoSearchTool, ManagedAgent, LiteLLMModel
 import subprocess  # Import subprocess to run shell commands
 from langchain.llms.base import LLM  # Import LLM
+# Initialize Mistral agent using Pydantic AI
+mistral_api_key = os.environ.get("MISTRAL_API_KEY")  # Ensure your Mistral API key is set
+mistral_model = MistralModel("mistral-large-latest", api_key=mistral_api_key)  # Use a Mistral model
+mistral_agent = Agent(mistral_model)
+# Load spaCy model for NER and download the spaCy model if not already installed
 def install_spacy_model():
     try:
         spacy.load("en_core_web_sm")
         print("spaCy model 'en_core_web_sm' is already installed.")
     except OSError:
         print("Downloading spaCy model 'en_core_web_sm'...")
         subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"], check=True)
         print("spaCy model 'en_core_web_sm' downloaded successfully.")
 # Load the spaCy model globally
 nlp = spacy.load("en_core_web_sm")
+# Function to moderate text using Pydantic AI's Mistral moderation model
 def moderate_text(query: str) -> str:
     """
+    Classifies the query as harmful or not using Mistral Moderation via Pydantic AI.
     Returns "OutOfScope" if harmful, otherwise returns the original query.
     """
+    response = mistral_agent.call("classify", {"inputs": [query]})
+    categories = response['results'][0]['categories']
+    # Check if harmful content is flagged in moderation categories
     if categories.get("violence_and_threats", False) or \
        categories.get("hate_and_discrimination", False) or \
        categories.get("dangerous_and_criminal_content", False) or \
         return "OutOfScope"
     return query
+# 3) build_or_load_vectorstore (no changes)
 def build_or_load_vectorstore(csv_path: str, store_dir: str) -> FAISS:
     if os.path.exists(store_dir):
         print(f"DEBUG: Found existing FAISS store at '{store_dir}'. Loading...")
         vectorstore.save_local(store_dir)
         return vectorstore
+# 4) Build RAG chain for Gemini (no changes)
 def build_rag_chain(llm_model: LiteLLMModel, vectorstore: FAISS) -> RetrievalQA:
     class GeminiLangChainLLM(LLM):
         def _call(self, prompt: str, stop: Optional[list] = None, **kwargs) -> str:
     )
     return rag_chain
+# 5) Initialize all the separate chains
 classification_chain = get_classification_chain()
 refusal_chain = get_refusal_chain()  # Refusal chain will now use dynamic topic
 tailor_chain = get_tailor_chain()
 cleaner_chain = get_cleaner_chain()
+# 6) Build our vectorstores + RAG chains
 wellness_csv = "AIChatbot.csv"
 brand_csv = "BrandAI.csv"
 wellness_store_dir = "faiss_wellness_store"
 wellness_rag_chain = build_rag_chain(gemini_llm, wellness_vectorstore)
 brand_rag_chain = build_rag_chain(gemini_llm, brand_vectorstore)
+# 7) Tools / Agents for web search (no changes)
 search_tool = DuckDuckGoSearchTool()
 web_agent = CodeAgent(tools=[search_tool], model=gemini_llm)
 managed_web_agent = ManagedAgent(agent=web_agent, name="web_search", description="Runs web search for you.")
     response = manager_agent.run(search_query)
     return response
+# 8) Orchestrator: run_with_chain
 def run_with_chain(query: str) -> str:
     print("DEBUG: Starting run_with_chain...")
     # 1) Moderate the query for harmful content
     moderated_query = moderate_text(query)
     if moderated_query == "OutOfScope":