ChatBotAgenticRAG_dup

Sleeping

App Files Files Community

Phoenix21 commited on 21 days ago

Commit

19fdb92

verified ·

1 Parent(s): c947e4c

Update pipeline.py

Browse files

Files changed (1) hide show

pipeline.py +8 -85

pipeline.py CHANGED Viewed

@@ -40,28 +40,11 @@ class QueryInput(BaseModel):
             raise ValueError("Query cannot be empty or just whitespace")
         return v.strip()
-class ClassificationResult(BaseModel):
-    category: str = Field(..., description="The classification category")
-    confidence: float = Field(..., ge=0.0, le=1.0, description="Classification confidence score")
 class ModerationResult(BaseModel):
     is_safe: bool = Field(..., description="Whether the content is safe")
     categories: Dict[str, bool] = Field(default_factory=dict, description="Detected content categories")
     original_text: str = Field(..., description="The original input text")
-class RAGResponse(BaseModel):
-    answer: str = Field(..., description="The generated answer")
-    sources: List[str] = Field(default_factory=list, description="Source documents used")
-    confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence score of the answer")
-def sanitize_message(message: Any) -> str:
-    """Sanitize message input to ensure it's a valid string."""
-    if hasattr(message, 'content'):
-        return str(message.content)
-    if isinstance(message, (list, dict)):
-        return str(message)
-    return str(message)
 # Load spaCy model for NER
 def install_spacy_model():
     try:
@@ -128,27 +111,18 @@ def moderate_text(query: str) -> ModerationResult:
     except Exception as e:
         raise RuntimeError(f"Moderation failed: {str(e)}")
-def classify_query(query: str) -> ClassificationResult:
     try:
         query_input = QueryInput(query=query)
         wellness_keywords = ["box breathing", "meditation", "yoga", "mindfulness", "breathing exercises"]
         if any(keyword in query_input.query.lower() for keyword in wellness_keywords):
-            return ClassificationResult(category="Wellness", confidence=0.9)
         class_result = classification_chain.invoke({"query": query_input.query})
         classification = class_result.get("text", "").strip()
-        confidence_map = {
-            "Wellness": 0.8,
-            "Brand": 0.8,
-            "OutOfScope": 0.6
-        }
-        return ClassificationResult(
-            category=classification if classification != "" else "OutOfScope",
-            confidence=confidence_map.get(classification, 0.5)
-        )
     except ValidationError as e:
         raise ValueError(f"Classification input validation failed: {str(e)}")
     except Exception as e:
@@ -166,14 +140,6 @@ def build_or_load_vectorstore(csv_path: str, store_dir: str) -> FAISS:
         df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
         df.columns = df.columns.str.strip()
-        if "Answer" in df.columns:
-            df.rename(columns={"Answer": "Answers"}, inplace=True)
-        if "Question" not in df.columns and "Question " in df.columns:
-            df.rename(columns={"Question ": "Question"}, inplace=True)
-        if "Question" not in df.columns or "Answers" not in df.columns:
-            raise ValueError("CSV must have 'Question' and 'Answers' columns")
         docs = [
             Document(page_content=str(row["Answers"]), metadata={"question": str(row["Question"])})
             for _, row in df.iterrows()
@@ -209,98 +175,55 @@ def build_rag_chain(llm_model: LiteLLMModel, vectorstore: FAISS) -> RetrievalQA:
     except Exception as e:
         raise RuntimeError(f"Error building RAG chain: {str(e)}")
-def do_web_search(query: str) -> str:
-    try:
-        query_input = QueryInput(query=query)
-        search_tool = DuckDuckGoSearchTool()
-        web_agent = CodeAgent(tools=[search_tool], model=pydantic_agent)
-        managed_web_agent = ManagedAgent(agent=web_agent, name="web_search", description="Performs web searches")
-        manager_agent = CodeAgent(tools=[], model=pydantic_agent, managed_agents=[managed_web_agent])
-        search_query = f"Give me relevant info: {query_input.query}"
-        return manager_agent.run(search_query)
-    except Exception as e:
-        return f"Web search failed: {str(e)}"
-def merge_responses(kb_answer: str, web_answer: str) -> str:
-    try:
-        if not kb_answer and not web_answer:
-            return "No relevant information found."
-        if not web_answer:
-            return kb_answer.strip()
-        if not kb_answer:
-            return web_answer.strip()
-        return f"Knowledge Base Answer: {kb_answer.strip()}\n\nWeb Search Result: {web_answer.strip()}"
-    except Exception as e:
-        return f"Error merging responses: {str(e)}"
 def run_pipeline(query: str) -> str:
     try:
-        # Sanitize and validate input
         query = sanitize_message(query)
-        # Moderate content
         moderation_result = moderate_text(query)
         if not moderation_result.is_safe:
             return "Sorry, this query contains harmful or inappropriate content."
-        # Classify the query
-        classification_result = classify_query(moderation_result.original_text)
-        # Handle different classifications
-        if classification_result.category == "OutOfScope":
             refusal_text = refusal_chain.run({"topic": "this topic"})
             return tailor_chain.run({"response": refusal_text}).strip()
-        if classification_result.category == "Wellness":
             rag_result = wellness_rag_chain({"query": moderation_result.original_text})
             csv_answer = rag_result["result"].strip()
             web_answer = "" if csv_answer else do_web_search(moderation_result.original_text)
             final_merged = merge_responses(csv_answer, web_answer)
             return tailor_chain.run({"response": final_merged}).strip()
-        if classification_result.category == "Brand":
             rag_result = brand_rag_chain({"query": moderation_result.original_text})
             csv_answer = rag_result["result"].strip()
             final_merged = merge_responses(csv_answer, "")
             return tailor_chain.run({"response": final_merged}).strip()
-        # Default fallback
         refusal_text = refusal_chain.run({"topic": "this topic"})
         return tailor_chain.run({"response": refusal_text}).strip()
-    except Exception as e:
-        return f"An error occurred while processing your request: {str(e)}"
 # Initialize chains and vectorstores
 try:
-    # Initialize chain components
     classification_chain = get_classification_chain()
     refusal_chain = get_refusal_chain()
     tailor_chain = get_tailor_chain()
     cleaner_chain = get_cleaner_chain()
-    # Set up paths
     wellness_csv = "AIChatbot.csv"
     brand_csv = "BrandAI.csv"
     wellness_store_dir = "faiss_wellness_store"
     brand_store_dir = "faiss_brand_store"
-    # Build or load vectorstores
     wellness_vectorstore = build_or_load_vectorstore(wellness_csv, wellness_store_dir)
     brand_vectorstore = build_or_load_vectorstore(brand_csv, brand_store_dir)
-    # Initialize LLM and RAG chains
     gemini_llm = LiteLLMModel(model_id="gemini/gemini-pro", api_key=os.environ.get("GEMINI_API_KEY"))
     wellness_rag_chain = build_rag_chain(gemini_llm, wellness_vectorstore)
     brand_rag_chain = build_rag_chain(gemini_llm, brand_vectorstore)
     print("Pipeline initialized successfully!")
 except Exception as e:
     print(f"Error initializing pipeline: {str(e)}")
-def run_with_chain(query: str) -> str:
-    return run_pipeline(query)

             raise ValueError("Query cannot be empty or just whitespace")
         return v.strip()
 class ModerationResult(BaseModel):
     is_safe: bool = Field(..., description="Whether the content is safe")
     categories: Dict[str, bool] = Field(default_factory=dict, description="Detected content categories")
     original_text: str = Field(..., description="The original input text")
 # Load spaCy model for NER
 def install_spacy_model():
     try:
     except Exception as e:
         raise RuntimeError(f"Moderation failed: {str(e)}")
+def classify_query(query: str) -> str:
     try:
         query_input = QueryInput(query=query)
         wellness_keywords = ["box breathing", "meditation", "yoga", "mindfulness", "breathing exercises"]
         if any(keyword in query_input.query.lower() for keyword in wellness_keywords):
+            return "Wellness"
         class_result = classification_chain.invoke({"query": query_input.query})
         classification = class_result.get("text", "").strip()
+        return classification if classification != "" else "OutOfScope"
     except ValidationError as e:
         raise ValueError(f"Classification input validation failed: {str(e)}")
     except Exception as e:
         df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
         df.columns = df.columns.str.strip()
         docs = [
             Document(page_content=str(row["Answers"]), metadata={"question": str(row["Question"])})
             for _, row in df.iterrows()
     except Exception as e:
         raise RuntimeError(f"Error building RAG chain: {str(e)}")
 def run_pipeline(query: str) -> str:
     try:
         query = sanitize_message(query)
         moderation_result = moderate_text(query)
         if not moderation_result.is_safe:
             return "Sorry, this query contains harmful or inappropriate content."
+        classification = classify_query(moderation_result.original_text)
+        if classification == "OutOfScope":
             refusal_text = refusal_chain.run({"topic": "this topic"})
             return tailor_chain.run({"response": refusal_text}).strip()
+        if classification == "Wellness":
             rag_result = wellness_rag_chain({"query": moderation_result.original_text})
             csv_answer = rag_result["result"].strip()
             web_answer = "" if csv_answer else do_web_search(moderation_result.original_text)
             final_merged = merge_responses(csv_answer, web_answer)
             return tailor_chain.run({"response": final_merged}).strip()
+        if classification == "Brand":
             rag_result = brand_rag_chain({"query": moderation_result.original_text})
             csv_answer = rag_result["result"].strip()
             final_merged = merge_responses(csv_answer, "")
             return tailor_chain.run({"response": final_merged}).strip()
         refusal_text = refusal_chain.run({"topic": "this topic"})
         return tailor_chain.run({"response": refusal_text}).strip()
 # Initialize chains and vectorstores
 try:
     classification_chain = get_classification_chain()
     refusal_chain = get_refusal_chain()
     tailor_chain = get_tailor_chain()
     cleaner_chain = get_cleaner_chain()
     wellness_csv = "AIChatbot.csv"
     brand_csv = "BrandAI.csv"
     wellness_store_dir = "faiss_wellness_store"
     brand_store_dir = "faiss_brand_store"
     wellness_vectorstore = build_or_load_vectorstore(wellness_csv, wellness_store_dir)
     brand_vectorstore = build_or_load_vectorstore(brand_csv, brand_store_dir)
     gemini_llm = LiteLLMModel(model_id="gemini/gemini-pro", api_key=os.environ.get("GEMINI_API_KEY"))
     wellness_rag_chain = build_rag_chain(gemini_llm, wellness_vectorstore)
     brand_rag_chain = build_rag_chain(gemini_llm, brand_vectorstore)
     print("Pipeline initialized successfully!")
 except Exception as e:
     print(f"Error initializing pipeline: {str(e)}")