ChatBotAgenticRAG_dup

Sleeping

App Files Files Community

Phoenix21 commited on 21 days ago

Commit

c947e4c

verified ·

1 Parent(s): a684f83

Update pipeline.py

Browse files

Files changed (1) hide show

pipeline.py +18 -18

pipeline.py CHANGED Viewed

@@ -25,6 +25,9 @@ from prompts import classification_prompt, refusal_prompt, tailor_prompt
 mistral_api_key = os.environ.get("MISTRAL_API_KEY")
 client = Mistral(api_key=mistral_api_key)
 # Pydantic models for validation and type safety
 class QueryInput(BaseModel):
     query: str = Field(..., min_length=1, description="The input query string")
@@ -51,6 +54,14 @@ class RAGResponse(BaseModel):
     sources: List[str] = Field(default_factory=list, description="Source documents used")
     confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence score of the answer")
 # Load spaCy model for NER
 def install_spacy_model():
     try:
@@ -70,13 +81,11 @@ def extract_main_topic(query: str) -> str:
         doc = nlp(query_input.query)
         main_topic = None
-        # Try to find named entities first
         for ent in doc.ents:
             if ent.label_ in ["ORG", "PRODUCT", "PERSON", "GPE", "TIME"]:
                 main_topic = ent.text
                 break
-        # If no named entities found, look for nouns
         if not main_topic:
             for token in doc:
                 if token.pos_ in ["NOUN", "PROPN"]:
@@ -157,7 +166,6 @@ def build_or_load_vectorstore(csv_path: str, store_dir: str) -> FAISS:
         df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
         df.columns = df.columns.str.strip()
-        # Handle column name variations
         if "Answer" in df.columns:
             df.rename(columns={"Answer": "Answers"}, inplace=True)
         if "Question" not in df.columns and "Question " in df.columns:
@@ -228,25 +236,13 @@ def merge_responses(kb_answer: str, web_answer: str) -> str:
         return f"Knowledge Base Answer: {kb_answer.strip()}\n\nWeb Search Result: {web_answer.strip()}"
     except Exception as e:
         return f"Error merging responses: {str(e)}"
-def sanitize_message(message: Any) -> str:
-    """Sanitize message input to ensure it's a valid string."""
-    if hasattr(message, 'content'):
-        return str(message.content)
-    if isinstance(message, (list, dict)):
-        return str(message)
-    return str(message)
-# Modify your run_pipeline function to include the sanitization
 def run_pipeline(query: str) -> str:
     try:
-        # Sanitize input
         query = sanitize_message(query)
-        # Rest of your pipeline code...
-        moderation_result = moderate_text(query)
-        if not moderation_result.is_safe:
-            return "Sorry, this query contains harmful or inappropriate content."
-        # Validate and moderate input
         moderation_result = moderate_text(query)
         if not moderation_result.is_safe:
             return "Sorry, this query contains harmful or inappropriate content."
@@ -254,11 +250,11 @@ def run_pipeline(query: str) -> str:
         # Classify the query
         classification_result = classify_query(moderation_result.original_text)
         if classification_result.category == "OutOfScope":
             refusal_text = refusal_chain.run({"topic": "this topic"})
             return tailor_chain.run({"response": refusal_text}).strip()
-        # Handle different classifications
         if classification_result.category == "Wellness":
             rag_result = wellness_rag_chain({"query": moderation_result.original_text})
             csv_answer = rag_result["result"].strip()
@@ -281,19 +277,23 @@ def run_pipeline(query: str) -> str:
 # Initialize chains and vectorstores
 try:
     classification_chain = get_classification_chain()
     refusal_chain = get_refusal_chain()
     tailor_chain = get_tailor_chain()
     cleaner_chain = get_cleaner_chain()
     wellness_csv = "AIChatbot.csv"
     brand_csv = "BrandAI.csv"
     wellness_store_dir = "faiss_wellness_store"
     brand_store_dir = "faiss_brand_store"
     wellness_vectorstore = build_or_load_vectorstore(wellness_csv, wellness_store_dir)
     brand_vectorstore = build_or_load_vectorstore(brand_csv, brand_store_dir)
     gemini_llm = LiteLLMModel(model_id="gemini/gemini-pro", api_key=os.environ.get("GEMINI_API_KEY"))
     wellness_rag_chain = build_rag_chain(gemini_llm, wellness_vectorstore)
     brand_rag_chain = build_rag_chain(gemini_llm, brand_vectorstore)

 mistral_api_key = os.environ.get("MISTRAL_API_KEY")
 client = Mistral(api_key=mistral_api_key)
+# Initialize LiteLLM model for web search
+pydantic_agent = LiteLLMModel(model_id="gemini/gemini-pro", api_key=os.environ.get("GEMINI_API_KEY"))
 # Pydantic models for validation and type safety
 class QueryInput(BaseModel):
     query: str = Field(..., min_length=1, description="The input query string")
     sources: List[str] = Field(default_factory=list, description="Source documents used")
     confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence score of the answer")
+def sanitize_message(message: Any) -> str:
+    """Sanitize message input to ensure it's a valid string."""
+    if hasattr(message, 'content'):
+        return str(message.content)
+    if isinstance(message, (list, dict)):
+        return str(message)
+    return str(message)
 # Load spaCy model for NER
 def install_spacy_model():
     try:
         doc = nlp(query_input.query)
         main_topic = None
         for ent in doc.ents:
             if ent.label_ in ["ORG", "PRODUCT", "PERSON", "GPE", "TIME"]:
                 main_topic = ent.text
                 break
         if not main_topic:
             for token in doc:
                 if token.pos_ in ["NOUN", "PROPN"]:
         df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
         df.columns = df.columns.str.strip()
         if "Answer" in df.columns:
             df.rename(columns={"Answer": "Answers"}, inplace=True)
         if "Question" not in df.columns and "Question " in df.columns:
         return f"Knowledge Base Answer: {kb_answer.strip()}\n\nWeb Search Result: {web_answer.strip()}"
     except Exception as e:
         return f"Error merging responses: {str(e)}"
 def run_pipeline(query: str) -> str:
     try:
+        # Sanitize and validate input
         query = sanitize_message(query)
+        # Moderate content
         moderation_result = moderate_text(query)
         if not moderation_result.is_safe:
             return "Sorry, this query contains harmful or inappropriate content."
         # Classify the query
         classification_result = classify_query(moderation_result.original_text)
+        # Handle different classifications
         if classification_result.category == "OutOfScope":
             refusal_text = refusal_chain.run({"topic": "this topic"})
             return tailor_chain.run({"response": refusal_text}).strip()
         if classification_result.category == "Wellness":
             rag_result = wellness_rag_chain({"query": moderation_result.original_text})
             csv_answer = rag_result["result"].strip()
 # Initialize chains and vectorstores
 try:
+    # Initialize chain components
     classification_chain = get_classification_chain()
     refusal_chain = get_refusal_chain()
     tailor_chain = get_tailor_chain()
     cleaner_chain = get_cleaner_chain()
+    # Set up paths
     wellness_csv = "AIChatbot.csv"
     brand_csv = "BrandAI.csv"
     wellness_store_dir = "faiss_wellness_store"
     brand_store_dir = "faiss_brand_store"
+    # Build or load vectorstores
     wellness_vectorstore = build_or_load_vectorstore(wellness_csv, wellness_store_dir)
     brand_vectorstore = build_or_load_vectorstore(brand_csv, brand_store_dir)
+    # Initialize LLM and RAG chains
     gemini_llm = LiteLLMModel(model_id="gemini/gemini-pro", api_key=os.environ.get("GEMINI_API_KEY"))
     wellness_rag_chain = build_rag_chain(gemini_llm, wellness_vectorstore)
     brand_rag_chain = build_rag_chain(gemini_llm, brand_vectorstore)