Spaces:

neural-thinker
/

cidadao.ai-backend

Paused

anderson-ufrj commited on Sep 25

Commit

796f99b

1 Parent(s): f93caf8

feat(ml): implement comprehensive ML Pipeline with versioning and A/B testing

- Add MLTrainingPipeline with support for multiple algorithms
- Implement model versioning with MLflow tracking
- Create A/B testing framework with multiple allocation strategies
- Add API endpoints for model training and management
- Support Thompson sampling and epsilon-greedy strategies
- Include statistical significance testing for A/B tests
- Add comprehensive unit tests for ML components

Files changed (9) hide show

ROADMAP_MELHORIAS_2025.md +1 -5
pyproject.toml +2 -0
src/api/app.py +6 -0
src/api/routes/ml_pipeline.py +451 -0
src/ml/__init__.py +31 -13
src/ml/ab_testing.py +512 -0
src/ml/training_pipeline.py +466 -756
tests/unit/ml/__init__.py +0 -0
tests/unit/ml/test_training_pipeline.py +369 -0

ROADMAP_MELHORIAS_2025.md CHANGED Viewed

@@ -251,10 +251,6 @@ Este documento apresenta um roadmap estruturado para melhorias no backend do Cid
    - [ ] Comentários e anotações
    - [ ] Workspaces compartilhados
-2. **Mobile & PWA**
-   - [ ] Progressive Web App
-   - [ ] Offline capabilities
-   - [ ] Push notifications
 **Entregáveis**: Platform enterprise-ready
@@ -325,4 +321,4 @@ Este documento apresenta um roadmap estruturado para melhorias no backend do Cid
 ---
-*Este roadmap é um documento vivo e deve ser revisado a cada sprint com base no feedback e aprendizados.*

    - [ ] Comentários e anotações
    - [ ] Workspaces compartilhados
 **Entregáveis**: Platform enterprise-ready
 ---
+*Este roadmap é um documento vivo e deve ser revisado a cada sprint com base no feedback e aprendizados.*

pyproject.toml CHANGED Viewed

@@ -61,6 +61,8 @@ dependencies = [
     "hdbscan>=0.8.33",
     "shap>=0.43.0",
     "lime>=0.2.0.1",
     # Async processing
     "celery[redis]>=5.3.4",

     "hdbscan>=0.8.33",
     "shap>=0.43.0",
     "lime>=0.2.0.1",
+    "mlflow>=2.9.0",
+    "joblib>=1.3.2",
     # Async processing
     "celery[redis]>=5.3.4",

src/api/app.py CHANGED Viewed

@@ -521,6 +521,12 @@ app.include_router(
     tags=["Geographic Data"]
 )
 # Global exception handler
 @app.exception_handler(CidadaoAIError)

     tags=["Geographic Data"]
 )
+from src.api.routes import ml_pipeline
+app.include_router(
+    ml_pipeline.router,
+    tags=["ML Pipeline"]
+)
 # Global exception handler
 @app.exception_handler(CidadaoAIError)

src/api/routes/ml_pipeline.py ADDED Viewed

	@@ -0,0 +1,451 @@

+"""
+ML Pipeline API Routes
+This module provides API endpoints for training, versioning, and
+A/B testing ML models.
+"""
+from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks
+from typing import Dict, Any, List, Optional
+from pydantic import BaseModel, Field
+import numpy as np
+from src.api.dependencies import get_current_user
+from src.ml.training_pipeline import get_training_pipeline
+from src.ml.ab_testing import get_ab_testing, TrafficAllocationStrategy
+from src.core import get_logger
+logger = get_logger(__name__)
+router = APIRouter(prefix="/api/v1/ml")
+class TrainModelRequest(BaseModel):
+    """Request model for training ML models."""
+    model_type: str = Field(..., description="Type of model (anomaly, fraud, pattern)")
+    algorithm: str = Field(..., description="Algorithm to use (isolation_forest, etc)")
+    dataset_id: Optional[str] = Field(None, description="Dataset identifier")
+    hyperparameters: Optional[Dict[str, Any]] = Field(default_factory=dict)
+    metadata: Optional[Dict[str, Any]] = Field(default_factory=dict)
+class PromoteModelRequest(BaseModel):
+    """Request model for promoting models."""
+    model_id: str = Field(..., description="Model identifier")
+    version: int = Field(..., description="Model version")
+    status: str = Field("production", description="Target status")
+class ABTestRequest(BaseModel):
+    """Request model for creating A/B tests."""
+    test_name: str = Field(..., description="Unique test name")
+    model_a_id: str = Field(..., description="Model A identifier")
+    model_a_version: Optional[int] = Field(None, description="Model A version")
+    model_b_id: str = Field(..., description="Model B identifier")
+    model_b_version: Optional[int] = Field(None, description="Model B version")
+    allocation_strategy: str = Field("random", description="Allocation strategy")
+    traffic_split: List[float] = Field([0.5, 0.5], description="Traffic split")
+    success_metric: str = Field("f1_score", description="Success metric")
+    minimum_sample_size: int = Field(1000, description="Minimum samples")
+    significance_level: float = Field(0.05, description="Significance level")
+    auto_stop: bool = Field(True, description="Auto stop on winner")
+    duration_hours: Optional[int] = Field(None, description="Max duration")
+class RecordPredictionRequest(BaseModel):
+    """Request model for recording predictions in A/B test."""
+    model_selection: str = Field(..., description="model_a or model_b")
+    success: bool = Field(..., description="Prediction success")
+    metadata: Optional[Dict[str, Any]] = Field(default_factory=dict)
+@router.post("/train", response_model=Dict[str, Any])
+async def train_model(
+    request: TrainModelRequest,
+    background_tasks: BackgroundTasks,
+    current_user: Dict = Depends(get_current_user)
+):
+    """
+    Train a new ML model.
+    This endpoint initiates model training with the specified algorithm
+    and parameters. Training runs asynchronously in the background.
+    """
+    try:
+        pipeline = await get_training_pipeline()
+        # For demo purposes, generate synthetic training data
+        # In production, this would load from dataset_id
+        if request.model_type == "anomaly":
+            # Generate anomaly detection data
+            n_samples = 1000
+            n_features = 10
+            X_train = np.random.randn(n_samples, n_features)
+            # Add some anomalies
+            anomalies = np.random.randn(50, n_features) * 3
+            X_train = np.vstack([X_train, anomalies])
+            y_train = None  # Unsupervised
+        elif request.model_type == "fraud":
+            # Generate fraud detection data
+            n_samples = 1000
+            n_features = 15
+            X_train = np.random.randn(n_samples, n_features)
+            y_train = np.random.choice([0, 1], size=n_samples, p=[0.95, 0.05])
+        else:
+            # Pattern recognition data
+            n_samples = 800
+            n_features = 20
+            X_train = np.random.randn(n_samples, n_features)
+            y_train = np.random.choice([0, 1, 2], size=n_samples)
+        # Start training
+        result = await pipeline.train_model(
+            model_type=request.model_type,
+            algorithm=request.algorithm,
+            X_train=X_train,
+            y_train=y_train,
+            hyperparameters=request.hyperparameters,
+            metadata={
+                **request.metadata,
+                "user_id": current_user["id"],
+                "dataset_id": request.dataset_id
+            }
+        )
+        return result
+    except Exception as e:
+        logger.error(f"Training failed: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
+@router.get("/models", response_model=List[Dict[str, Any]])
+async def list_models(
+    model_type: Optional[str] = None,
+    current_user: Dict = Depends(get_current_user)
+):
+    """List all available models with their versions."""
+    try:
+        pipeline = await get_training_pipeline()
+        # Get models from registry
+        models = []
+        for model_id, registry in pipeline.model_registry.items():
+            if model_type and not model_id.startswith(model_type):
+                continue
+            models.append({
+                "model_id": model_id,
+                "versions": len(registry["versions"]),
+                "latest_version": max(
+                    (v["version"] for v in registry["versions"]),
+                    default=0
+                ),
+                "created_at": registry["created_at"],
+                "production_version": next(
+                    (v["version"] for v in registry["versions"]
+                     if v.get("status") == "production"),
+                    None
+                )
+            })
+        return models
+    except Exception as e:
+        logger.error(f"Failed to list models: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
+@router.get("/models/{model_id}/versions", response_model=List[Dict[str, Any]])
+async def list_model_versions(
+    model_id: str,
+    current_user: Dict = Depends(get_current_user)
+):
+    """List all versions of a specific model."""
+    try:
+        pipeline = await get_training_pipeline()
+        if model_id not in pipeline.model_registry:
+            raise HTTPException(status_code=404, detail="Model not found")
+        versions = []
+        for version in pipeline.model_registry[model_id]["versions"]:
+            versions.append({
+                "version": version["version"],
+                "status": version["status"],
+                "metrics": version["metrics"],
+                "created_at": version["created_at"],
+                "promoted_at": version.get("promoted_at")
+            })
+        return versions
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Failed to list versions: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
+@router.get("/models/{model_id}/metrics", response_model=Dict[str, Any])
+async def get_model_metrics(
+    model_id: str,
+    version: Optional[int] = None,
+    current_user: Dict = Depends(get_current_user)
+):
+    """Get metrics for a specific model version."""
+    try:
+        pipeline = await get_training_pipeline()
+        metrics = await pipeline.get_model_metrics(model_id, version)
+        return {
+            "model_id": model_id,
+            "version": version or "latest",
+            "metrics": metrics
+        }
+    except Exception as e:
+        logger.error(f"Failed to get metrics: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
+@router.post("/models/promote", response_model=Dict[str, Any])
+async def promote_model(
+    request: PromoteModelRequest,
+    current_user: Dict = Depends(get_current_user)
+):
+    """Promote a model version to production."""
+    try:
+        pipeline = await get_training_pipeline()
+        success = await pipeline.promote_model(
+            request.model_id,
+            request.version,
+            request.status
+        )
+        if not success:
+            raise HTTPException(status_code=500, detail="Promotion failed")
+        return {
+            "success": True,
+            "model_id": request.model_id,
+            "version": request.version,
+            "status": request.status,
+            "message": f"Model promoted to {request.status}"
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Failed to promote model: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
+@router.post("/ab-test/create", response_model=Dict[str, Any])
+async def create_ab_test(
+    request: ABTestRequest,
+    current_user: Dict = Depends(get_current_user)
+):
+    """Create a new A/B test."""
+    try:
+        ab_framework = await get_ab_testing()
+        # Validate allocation strategy
+        try:
+            strategy = TrafficAllocationStrategy(request.allocation_strategy)
+        except ValueError:
+            raise HTTPException(
+                status_code=400,
+                detail=f"Invalid allocation strategy: {request.allocation_strategy}"
+            )
+        test_config = await ab_framework.create_test(
+            test_name=request.test_name,
+            model_a=(request.model_a_id, request.model_a_version),
+            model_b=(request.model_b_id, request.model_b_version),
+            allocation_strategy=strategy,
+            traffic_split=tuple(request.traffic_split),
+            success_metric=request.success_metric,
+            minimum_sample_size=request.minimum_sample_size,
+            significance_level=request.significance_level,
+            auto_stop=request.auto_stop,
+            duration_hours=request.duration_hours
+        )
+        return test_config
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        logger.error(f"Failed to create A/B test: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
+@router.post("/ab-test/{test_name}/start", response_model=Dict[str, Any])
+async def start_ab_test(
+    test_name: str,
+    current_user: Dict = Depends(get_current_user)
+):
+    """Start an A/B test."""
+    try:
+        ab_framework = await get_ab_testing()
+        success = await ab_framework.start_test(test_name)
+        if not success:
+            raise HTTPException(status_code=500, detail="Failed to start test")
+        return {
+            "success": True,
+            "test_name": test_name,
+            "message": "A/B test started"
+        }
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        logger.error(f"Failed to start A/B test: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
+@router.get("/ab-test/{test_name}/allocate", response_model=Dict[str, Any])
+async def allocate_model_for_test(
+    test_name: str,
+    user_id: Optional[str] = None
+):
+    """Get model allocation for a user in an A/B test."""
+    try:
+        ab_framework = await get_ab_testing()
+        model_id, version = await ab_framework.allocate_model(test_name, user_id)
+        return {
+            "model_id": model_id,
+            "version": version,
+            "test_name": test_name,
+            "user_id": user_id
+        }
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        logger.error(f"Failed to allocate model: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
+@router.post("/ab-test/{test_name}/record", response_model=Dict[str, Any])
+async def record_prediction(
+    test_name: str,
+    request: RecordPredictionRequest
+):
+    """Record a prediction result for an A/B test."""
+    try:
+        ab_framework = await get_ab_testing()
+        await ab_framework.record_prediction(
+            test_name,
+            request.model_selection,
+            request.success,
+            request.metadata
+        )
+        return {
+            "success": True,
+            "test_name": test_name,
+            "model_selection": request.model_selection
+        }
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        logger.error(f"Failed to record prediction: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
+@router.get("/ab-test/{test_name}/status", response_model=Dict[str, Any])
+async def get_ab_test_status(
+    test_name: str,
+    current_user: Dict = Depends(get_current_user)
+):
+    """Get current status and results of an A/B test."""
+    try:
+        ab_framework = await get_ab_testing()
+        status = await ab_framework.get_test_status(test_name)
+        # Include latest analysis if available
+        if "latest_analysis" in status:
+            status["analysis"] = status["latest_analysis"]
+        return status
+    except ValueError as e:
+        raise HTTPException(status_code=404, detail=str(e))
+    except Exception as e:
+        logger.error(f"Failed to get test status: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
+@router.post("/ab-test/{test_name}/stop", response_model=Dict[str, Any])
+async def stop_ab_test(
+    test_name: str,
+    reason: str = "Manual stop",
+    current_user: Dict = Depends(get_current_user)
+):
+    """Stop an A/B test."""
+    try:
+        ab_framework = await get_ab_testing()
+        success = await ab_framework.stop_test(test_name, reason)
+        if not success:
+            raise HTTPException(status_code=500, detail="Failed to stop test")
+        return {
+            "success": True,
+            "test_name": test_name,
+            "message": f"A/B test stopped: {reason}"
+        }
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        logger.error(f"Failed to stop A/B test: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
+@router.post("/ab-test/{test_name}/promote-winner", response_model=Dict[str, Any])
+async def promote_ab_test_winner(
+    test_name: str,
+    current_user: Dict = Depends(get_current_user)
+):
+    """Promote the winning model from an A/B test to production."""
+    try:
+        ab_framework = await get_ab_testing()
+        success = await ab_framework.promote_winner(test_name)
+        if not success:
+            raise HTTPException(status_code=500, detail="Failed to promote winner")
+        return {
+            "success": True,
+            "test_name": test_name,
+            "message": "Winner promoted to production"
+        }
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        logger.error(f"Failed to promote winner: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
+@router.get("/ab-test/active", response_model=List[Dict[str, Any]])
+async def list_active_ab_tests(
+    current_user: Dict = Depends(get_current_user)
+):
+    """List all active A/B tests."""
+    try:
+        ab_framework = await get_ab_testing()
+        active_tests = await ab_framework.list_active_tests()
+        return active_tests
+    except Exception as e:
+        logger.error(f"Failed to list active tests: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))

src/ml/__init__.py CHANGED Viewed

@@ -1,19 +1,37 @@
-"""Machine Learning models and utilities for Cidadao.AI.
-This module provides ML capabilities including:
-- Anomaly detection algorithms
-- Pattern analysis and correlation detection
-- Predictive models for spending analysis
-Status: Stub implementation - Full ML models planned for enhancement phase.
 """
-from .anomaly_detector import AnomalyDetector
-from .pattern_analyzer import PatternAnalyzer
-from .models import MLModel
 __all__ = [
-    "AnomalyDetector",
-    "PatternAnalyzer",
-    "MLModel"
 ]

+"""
+ML Pipeline Module
+This module provides machine learning capabilities including:
+- Model training pipeline
+- Model versioning
+- A/B testing framework
 """
+from src.ml.training_pipeline import (
+    MLTrainingPipeline,
+    training_pipeline,
+    get_training_pipeline
+)
+from src.ml.ab_testing import (
+    ABTestFramework,
+    ABTestStatus,
+    TrafficAllocationStrategy,
+    ab_testing,
+    get_ab_testing
+)
 __all__ = [
+    # Training Pipeline
+    "MLTrainingPipeline",
+    "training_pipeline",
+    "get_training_pipeline",
+    # A/B Testing
+    "ABTestFramework",
+    "ABTestStatus",
+    "TrafficAllocationStrategy",
+    "ab_testing",
+    "get_ab_testing"
 ]

src/ml/ab_testing.py ADDED Viewed

	@@ -0,0 +1,512 @@

+"""
+A/B Testing Framework for ML Models
+This module provides A/B testing capabilities for comparing model
+performance in production environments.
+"""
+import asyncio
+import json
+import random
+from datetime import datetime, timedelta
+from typing import Dict, Any, List, Optional, Tuple, Union
+from enum import Enum
+import numpy as np
+from scipy import stats
+from src.core import get_logger
+from src.infrastructure.cache.redis_client import get_redis_client
+from src.ml.training_pipeline import training_pipeline
+logger = get_logger(__name__)
+class ABTestStatus(Enum):
+    """Status of an A/B test."""
+    DRAFT = "draft"
+    RUNNING = "running"
+    PAUSED = "paused"
+    COMPLETED = "completed"
+    STOPPED = "stopped"
+class TrafficAllocationStrategy(Enum):
+    """Strategy for allocating traffic between models."""
+    RANDOM = "random"
+    WEIGHTED = "weighted"
+    EPSILON_GREEDY = "epsilon_greedy"
+    THOMPSON_SAMPLING = "thompson_sampling"
+class ABTestFramework:
+    """
+    A/B Testing framework for ML models.
+    Features:
+    - Multiple allocation strategies
+    - Statistical significance testing
+    - Real-time performance tracking
+    - Automatic winner selection
+    - Gradual rollout support
+    """
+    def __init__(self):
+        """Initialize the A/B testing framework."""
+        self.active_tests = {}
+        self.test_results = {}
+    async def create_test(
+        self,
+        test_name: str,
+        model_a: Tuple[str, Optional[int]],  # (model_id, version)
+        model_b: Tuple[str, Optional[int]],
+        allocation_strategy: TrafficAllocationStrategy = TrafficAllocationStrategy.RANDOM,
+        traffic_split: Tuple[float, float] = (0.5, 0.5),
+        success_metric: str = "f1_score",
+        minimum_sample_size: int = 1000,
+        significance_level: float = 0.05,
+        auto_stop: bool = True,
+        duration_hours: Optional[int] = None
+    ) -> Dict[str, Any]:
+        """
+        Create a new A/B test.
+        Args:
+            test_name: Unique name for the test
+            model_a: Model A (control) - (model_id, version)
+            model_b: Model B (treatment) - (model_id, version)
+            allocation_strategy: Traffic allocation strategy
+            traffic_split: Traffic split between models (must sum to 1.0)
+            success_metric: Metric to optimize
+            minimum_sample_size: Minimum samples before analysis
+            significance_level: Statistical significance threshold
+            auto_stop: Automatically stop when winner found
+            duration_hours: Maximum test duration
+        Returns:
+            Test configuration
+        """
+        if test_name in self.active_tests:
+            raise ValueError(f"Test {test_name} already exists")
+        if abs(sum(traffic_split) - 1.0) > 0.001:
+            raise ValueError("Traffic split must sum to 1.0")
+        # Load models to verify they exist
+        await training_pipeline.load_model(*model_a)
+        await training_pipeline.load_model(*model_b)
+        test_config = {
+            "test_id": f"ab_test_{test_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
+            "test_name": test_name,
+            "model_a": {"model_id": model_a[0], "version": model_a[1]},
+            "model_b": {"model_id": model_b[0], "version": model_b[1]},
+            "allocation_strategy": allocation_strategy.value,
+            "traffic_split": traffic_split,
+            "success_metric": success_metric,
+            "minimum_sample_size": minimum_sample_size,
+            "significance_level": significance_level,
+            "auto_stop": auto_stop,
+            "status": ABTestStatus.DRAFT.value,
+            "created_at": datetime.now().isoformat(),
+            "start_time": None,
+            "end_time": None,
+            "duration_hours": duration_hours,
+            "results": {
+                "model_a": {"predictions": 0, "successes": 0, "metrics": {}},
+                "model_b": {"predictions": 0, "successes": 0, "metrics": {}}
+            }
+        }
+        # Initialize allocation strategy specific params
+        if allocation_strategy == TrafficAllocationStrategy.EPSILON_GREEDY:
+            test_config["epsilon"] = 0.1  # 10% exploration
+        elif allocation_strategy == TrafficAllocationStrategy.THOMPSON_SAMPLING:
+            test_config["thompson_params"] = {
+                "model_a": {"alpha": 1, "beta": 1},
+                "model_b": {"alpha": 1, "beta": 1}
+            }
+        self.active_tests[test_name] = test_config
+        # Save to Redis
+        await self._save_test_config(test_config)
+        logger.info(f"Created A/B test: {test_name}")
+        return test_config
+    async def start_test(self, test_name: str) -> bool:
+        """Start an A/B test."""
+        if test_name not in self.active_tests:
+            # Try to load from Redis
+            test_config = await self._load_test_config(test_name)
+            if not test_config:
+                raise ValueError(f"Test {test_name} not found")
+            self.active_tests[test_name] = test_config
+        test_config = self.active_tests[test_name]
+        if test_config["status"] not in [ABTestStatus.DRAFT.value, ABTestStatus.PAUSED.value]:
+            raise ValueError(f"Cannot start test in status {test_config['status']}")
+        test_config["status"] = ABTestStatus.RUNNING.value
+        test_config["start_time"] = datetime.now().isoformat()
+        await self._save_test_config(test_config)
+        logger.info(f"Started A/B test: {test_name}")
+        return True
+    async def allocate_model(
+        self,
+        test_name: str,
+        user_id: Optional[str] = None
+    ) -> Tuple[str, int]:
+        """
+        Allocate a model for a user based on the test configuration.
+        Args:
+            test_name: Test name
+            user_id: User identifier for consistent allocation
+        Returns:
+            Tuple of (model_id, version)
+        """
+        test_config = self.active_tests.get(test_name)
+        if not test_config:
+            test_config = await self._load_test_config(test_name)
+            if not test_config:
+                raise ValueError(f"Test {test_name} not found")
+        if test_config["status"] != ABTestStatus.RUNNING.value:
+            raise ValueError(f"Test {test_name} is not running")
+        # Select model based on allocation strategy
+        strategy = TrafficAllocationStrategy(test_config["allocation_strategy"])
+        if strategy == TrafficAllocationStrategy.RANDOM:
+            selected = await self._random_allocation(test_config, user_id)
+        elif strategy == TrafficAllocationStrategy.WEIGHTED:
+            selected = await self._weighted_allocation(test_config)
+        elif strategy == TrafficAllocationStrategy.EPSILON_GREEDY:
+            selected = await self._epsilon_greedy_allocation(test_config)
+        elif strategy == TrafficAllocationStrategy.THOMPSON_SAMPLING:
+            selected = await self._thompson_sampling_allocation(test_config)
+        else:
+            selected = "model_a"  # Default fallback
+        # Return model info
+        model_info = test_config[selected]
+        return (model_info["model_id"], model_info["version"])
+    async def _random_allocation(
+        self,
+        test_config: Dict[str, Any],
+        user_id: Optional[str] = None
+    ) -> str:
+        """Random allocation with optional user-based consistency."""
+        if user_id:
+            # Hash user_id for consistent allocation
+            hash_val = hash(user_id + test_config["test_id"]) % 100
+            threshold = test_config["traffic_split"][0] * 100
+            return "model_a" if hash_val < threshold else "model_b"
+        else:
+            # Pure random
+            return "model_a" if random.random() < test_config["traffic_split"][0] else "model_b"
+    async def _weighted_allocation(self, test_config: Dict[str, Any]) -> str:
+        """Weighted allocation based on traffic split."""
+        return np.random.choice(
+            ["model_a", "model_b"],
+            p=test_config["traffic_split"]
+        )
+    async def _epsilon_greedy_allocation(self, test_config: Dict[str, Any]) -> str:
+        """Epsilon-greedy allocation (explore vs exploit)."""
+        epsilon = test_config.get("epsilon", 0.1)
+        if random.random() < epsilon:
+            # Explore
+            return random.choice(["model_a", "model_b"])
+        else:
+            # Exploit - choose best performing
+            results = test_config["results"]
+            rate_a = (results["model_a"]["successes"] /
+                     max(results["model_a"]["predictions"], 1))
+            rate_b = (results["model_b"]["successes"] /
+                     max(results["model_b"]["predictions"], 1))
+            return "model_a" if rate_a >= rate_b else "model_b"
+    async def _thompson_sampling_allocation(self, test_config: Dict[str, Any]) -> str:
+        """Thompson sampling allocation (Bayesian approach)."""
+        params = test_config["thompson_params"]
+        # Sample from Beta distributions
+        sample_a = np.random.beta(params["model_a"]["alpha"], params["model_a"]["beta"])
+        sample_b = np.random.beta(params["model_b"]["alpha"], params["model_b"]["beta"])
+        return "model_a" if sample_a >= sample_b else "model_b"
+    async def record_prediction(
+        self,
+        test_name: str,
+        model_selection: str,  # "model_a" or "model_b"
+        success: bool,
+        prediction_metadata: Optional[Dict[str, Any]] = None
+    ):
+        """
+        Record a prediction result for the test.
+        Args:
+            test_name: Test name
+            model_selection: Which model was used
+            success: Whether prediction was successful
+            prediction_metadata: Additional metadata
+        """
+        test_config = self.active_tests.get(test_name)
+        if not test_config:
+            test_config = await self._load_test_config(test_name)
+            if not test_config:
+                raise ValueError(f"Test {test_name} not found")
+        # Update results
+        results = test_config["results"][model_selection]
+        results["predictions"] += 1
+        if success:
+            results["successes"] += 1
+        # Update Thompson sampling parameters if applicable
+        if test_config["allocation_strategy"] == TrafficAllocationStrategy.THOMPSON_SAMPLING.value:
+            params = test_config["thompson_params"][model_selection]
+            if success:
+                params["alpha"] += 1
+            else:
+                params["beta"] += 1
+        # Save updated config
+        await self._save_test_config(test_config)
+        # Check if we should analyze results
+        total_predictions = (test_config["results"]["model_a"]["predictions"] +
+                           test_config["results"]["model_b"]["predictions"])
+        if total_predictions >= test_config["minimum_sample_size"]:
+            analysis = await self.analyze_test(test_name)
+            if test_config["auto_stop"] and analysis.get("winner"):
+                await self.stop_test(test_name, reason="Winner found")
+    async def analyze_test(self, test_name: str) -> Dict[str, Any]:
+        """
+        Analyze test results for statistical significance.
+        Returns:
+            Analysis results including winner if found
+        """
+        test_config = self.active_tests.get(test_name)
+        if not test_config:
+            test_config = await self._load_test_config(test_name)
+            if not test_config:
+                raise ValueError(f"Test {test_name} not found")
+        results_a = test_config["results"]["model_a"]
+        results_b = test_config["results"]["model_b"]
+        # Calculate conversion rates
+        rate_a = results_a["successes"] / max(results_a["predictions"], 1)
+        rate_b = results_b["successes"] / max(results_b["predictions"], 1)
+        # Perform chi-square test
+        contingency_table = np.array([
+            [results_a["successes"], results_a["predictions"] - results_a["successes"]],
+            [results_b["successes"], results_b["predictions"] - results_b["successes"]]
+        ])
+        chi2, p_value, dof, expected = stats.chi2_contingency(contingency_table)
+        # Calculate confidence intervals
+        ci_a = self._calculate_confidence_interval(
+            results_a["successes"], results_a["predictions"]
+        )
+        ci_b = self._calculate_confidence_interval(
+            results_b["successes"], results_b["predictions"]
+        )
+        # Determine winner
+        winner = None
+        if p_value < test_config["significance_level"]:
+            winner = "model_a" if rate_a > rate_b else "model_b"
+        # Calculate lift
+        lift = ((rate_b - rate_a) / rate_a * 100) if rate_a > 0 else 0
+        analysis = {
+            "model_a": {
+                "conversion_rate": rate_a,
+                "confidence_interval": ci_a,
+                "sample_size": results_a["predictions"]
+            },
+            "model_b": {
+                "conversion_rate": rate_b,
+                "confidence_interval": ci_b,
+                "sample_size": results_b["predictions"]
+            },
+            "p_value": p_value,
+            "chi_square": chi2,
+            "significant": p_value < test_config["significance_level"],
+            "winner": winner,
+            "lift": lift,
+            "analysis_time": datetime.now().isoformat()
+        }
+        # Update test config with latest analysis
+        test_config["latest_analysis"] = analysis
+        await self._save_test_config(test_config)
+        return analysis
+    def _calculate_confidence_interval(
+        self,
+        successes: int,
+        total: int,
+        confidence_level: float = 0.95
+    ) -> Tuple[float, float]:
+        """Calculate confidence interval for conversion rate."""
+        if total == 0:
+            return (0.0, 0.0)
+        rate = successes / total
+        z = stats.norm.ppf((1 + confidence_level) / 2)
+        # Wilson score interval
+        denominator = 1 + z**2 / total
+        center = (rate + z**2 / (2 * total)) / denominator
+        margin = z * np.sqrt(rate * (1 - rate) / total + z**2 / (4 * total**2)) / denominator
+        return (max(0, center - margin), min(1, center + margin))
+    async def stop_test(self, test_name: str, reason: str = "Manual stop") -> bool:
+        """Stop an A/B test."""
+        test_config = self.active_tests.get(test_name)
+        if not test_config:
+            test_config = await self._load_test_config(test_name)
+            if not test_config:
+                raise ValueError(f"Test {test_name} not found")
+        test_config["status"] = ABTestStatus.STOPPED.value
+        test_config["end_time"] = datetime.now().isoformat()
+        test_config["stop_reason"] = reason
+        # Perform final analysis
+        final_analysis = await self.analyze_test(test_name)
+        test_config["final_analysis"] = final_analysis
+        await self._save_test_config(test_config)
+        # Move to completed tests
+        self.test_results[test_name] = test_config
+        if test_name in self.active_tests:
+            del self.active_tests[test_name]
+        logger.info(f"Stopped A/B test {test_name}: {reason}")
+        return True
+    async def get_test_status(self, test_name: str) -> Dict[str, Any]:
+        """Get current status of a test."""
+        test_config = self.active_tests.get(test_name)
+        if not test_config:
+            test_config = await self._load_test_config(test_name)
+            if not test_config:
+                raise ValueError(f"Test {test_name} not found")
+        # Add runtime if running
+        if test_config["status"] == ABTestStatus.RUNNING.value and test_config["start_time"]:
+            start = datetime.fromisoformat(test_config["start_time"])
+            runtime = (datetime.now() - start).total_seconds() / 3600
+            test_config["runtime_hours"] = runtime
+            # Check if should auto-stop due to duration
+            if test_config.get("duration_hours") and runtime >= test_config["duration_hours"]:
+                await self.stop_test(test_name, reason="Duration limit reached")
+        return test_config
+    async def promote_winner(self, test_name: str) -> bool:
+        """Promote the winning model to production."""
+        test_config = self.test_results.get(test_name)
+        if not test_config:
+            # Try loading completed test
+            test_config = await self._load_test_config(test_name)
+            if not test_config or test_config["status"] != ABTestStatus.STOPPED.value:
+                raise ValueError(f"Test {test_name} not completed")
+        final_analysis = test_config.get("final_analysis", {})
+        winner = final_analysis.get("winner")
+        if not winner:
+            raise ValueError(f"No winner found for test {test_name}")
+        # Promote winning model
+        model_info = test_config[winner]
+        success = await training_pipeline.promote_model(
+            model_info["model_id"],
+            model_info["version"],
+            "production"
+        )
+        if success:
+            logger.info(f"Promoted {winner} from test {test_name} to production")
+        return success
+    async def _save_test_config(self, test_config: Dict[str, Any]):
+        """Save test configuration to Redis."""
+        redis_client = await get_redis_client()
+        key = f"ab_test:{test_config['test_name']}"
+        await redis_client.set(
+            key,
+            json.dumps(test_config),
+            ex=86400 * 90  # 90 days
+        )
+    async def _load_test_config(self, test_name: str) -> Optional[Dict[str, Any]]:
+        """Load test configuration from Redis."""
+        redis_client = await get_redis_client()
+        key = f"ab_test:{test_name}"
+        data = await redis_client.get(key)
+        return json.loads(data) if data else None
+    async def list_active_tests(self) -> List[Dict[str, Any]]:
+        """List all active tests."""
+        # Load from Redis pattern
+        redis_client = await get_redis_client()
+        keys = await redis_client.keys("ab_test:*")
+        active_tests = []
+        for key in keys:
+            data = await redis_client.get(key)
+            if data:
+                test_config = json.loads(data)
+                if test_config["status"] in [ABTestStatus.RUNNING.value, ABTestStatus.PAUSED.value]:
+                    active_tests.append({
+                        "test_name": test_config["test_name"],
+                        "status": test_config["status"],
+                        "model_a": test_config["model_a"]["model_id"],
+                        "model_b": test_config["model_b"]["model_id"],
+                        "start_time": test_config.get("start_time"),
+                        "predictions": (test_config["results"]["model_a"]["predictions"] +
+                                      test_config["results"]["model_b"]["predictions"])
+                    })
+        return active_tests
+# Global A/B testing framework instance
+ab_testing = ABTestFramework()
+async def get_ab_testing() -> ABTestFramework:
+    """Get the global A/B testing framework instance."""
+    return ab_testing

src/ml/training_pipeline.py CHANGED Viewed

@@ -1,813 +1,523 @@
 """
-Pipeline de Treinamento para Cidadão.AI
-Sistema completo de fine-tuning especializado para dados de transparência pública brasileira.
-Inspirado nas técnicas do Kimi K2, mas otimizado para análise governamental.
 """
 import os
-from src.core import json_utils
-import torch
-import torch.nn as nn
-from torch.utils.data import Dataset, DataLoader
-from torch.optim import AdamW
-from torch.optim.lr_scheduler import CosineAnnealingLR
-from transformers import AutoTokenizer, get_linear_schedule_with_warmup
-from typing import Dict, List, Optional, Tuple, Any
-import pandas as pd
-import numpy as np
 from pathlib import Path
-import logging
-from dataclasses import dataclass, asdict
-from tqdm import tqdm
-import wandb
-from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
-import matplotlib.pyplot as plt
-import seaborn as sns
-from .cidadao_model import CidadaoAIForTransparency, CidadaoModelConfig, create_cidadao_model
-logger = logging.getLogger(__name__)
-@dataclass
-class TrainingConfig:
-    """Configuração de treinamento"""
-    # Hiperparâmetros principais
-    learning_rate: float = 2e-5
-    batch_size: int = 8
-    num_epochs: int = 10
-    warmup_steps: int = 1000
-    max_grad_norm: float = 1.0
-    weight_decay: float = 0.01
-    # Configurações de dados
-    max_sequence_length: int = 512
-    train_split: float = 0.8
-    val_split: float = 0.1
-    test_split: float = 0.1
-    # Configurações do modelo
-    model_size: str = "medium"
-    specialized_tasks: List[str] = None
-    use_mixed_precision: bool = True
-    gradient_accumulation_steps: int = 4
-    # Configurações de checkpoint
-    save_strategy: str = "epoch"  # "steps" ou "epoch"
-    save_steps: int = 500
-    eval_steps: int = 100
-    logging_steps: int = 50
-    output_dir: str = "./models/cidadao-gpt"
-    # Configurações de avaliação
-    eval_strategy: str = "steps"
-    metric_for_best_model: str = "eval_f1"
-    greater_is_better: bool = True
-    early_stopping_patience: int = 3
-    # Configurações de experimentação
-    experiment_name: str = "cidadao-gpt-v1"
-    use_wandb: bool = True
-    wandb_project: str = "cidadao-ai"
-    def __post_init__(self):
-        if self.specialized_tasks is None:
-            self.specialized_tasks = ["all"]
-class TransparencyDataset(Dataset):
-    """Dataset especializado para dados de transparência pública"""
-    def __init__(
-        self,
-        data_path: str,
-        tokenizer: AutoTokenizer,
-        max_length: int = 512,
-        task_type: str = "multi_task"
-    ):
-        self.tokenizer = tokenizer
-        self.max_length = max_length
-        self.task_type = task_type
-        # Carregar dados
-        self.data = self._load_data(data_path)
-        # Preparar vocabulário especializado
-        self._prepare_specialized_vocab()
-    def _load_data(self, data_path: str) -> List[Dict]:
-        """Carregar dados de transparência"""
-        data_file = Path(data_path)
-        if data_file.suffix == '.json':
-            with open(data_file, 'r', encoding='utf-8') as f:
-                data = json_utils.load(f)
-        elif data_file.suffix == '.jsonl':
-            data = []
-            with open(data_file, 'r', encoding='utf-8') as f:
-                for line in f:
-                    data.append(json_utils.loads(line))
-        else:
-            # Assumir dados do Portal da Transparência em formato estruturado
-            data = self._load_transparency_data(data_path)
-        logger.info(f"Carregados {len(data)} exemplos de {data_path}")
-        return data
-    def _load_transparency_data(self, data_path: str) -> List[Dict]:
-        """Carregar dados reais do Portal da Transparência"""
-        # Simular estrutura de dados reais
-        # Em produção, isso seria conectado ao pipeline de dados real
-        sample_data = []
-        # Exemplos de contratos com diferentes tipos de problemas
-        contract_examples = [
-            {
-                "text": "Contrato para aquisição de equipamentos médicos no valor de R$ 2.500.000,00 firmado entre Ministério da Saúde e Empresa XYZ LTDA. Processo licitatório 12345/2024, modalidade pregão eletrônico.",
-                "anomaly_label": 0,  # Normal
-                "financial_risk": 2,  # Médio
-                "legal_compliance": 1,  # Conforme
-                "contract_value": 2500000.0,
-                "entity_types": [1, 2, 3],  # Ministério, Empresa, Equipamento
-                "corruption_indicators": []
-            },
-            {
-                "text": "Contrato emergencial sem licitação para fornecimento de insumos hospitalares. Valor: R$ 15.000.000,00. Empresa beneficiária: Alpha Beta Comercial S.A., CNPJ com irregularidades na Receita Federal.",
-                "anomaly_label": 2,  # Anômalo
-                "financial_risk": 4,  # Alto
-                "legal_compliance": 0,  # Não conforme
-                "contract_value": 15000000.0,
-                "entity_types": [1, 2, 4],  # Ministério, Empresa, Insumos
-                "corruption_indicators": [1, 3, 5]  # Emergencial, Sem licitação, CNPJ irregular
-            }
-        ]
-        # Amplificar dados com variações
-        for base_example in contract_examples:
-            for i in range(50):  # 50 variações de cada exemplo
-                example = base_example.copy()
-                example["id"] = f"{len(sample_data)}"
-                # Adicionar ruído realístico
-                if np.random.random() > 0.5:
-                    example["text"] = self._add_realistic_variations(example["text"])
-                sample_data.append(example)
-        return sample_data
-    def _add_realistic_variations(self, text: str) -> str:
-        """Adicionar variações realísticas ao texto"""
-        variations = [
-            text.replace("Ministério da Saúde", "MS"),
-            text.replace("equipamentos médicos", "equipamentos hospitalares"),
-            text.replace("pregão eletrônico", "concorrência pública"),
-            text + " Processo administrativo arquivado em sistema SIASG.",
-            text + " Valor atualizado conforme INPC/IBGE."
-        ]
-        return np.random.choice(variations)
-    def _prepare_specialized_vocab(self):
-        """Preparar vocabulário especializado para transparência"""
-        # Termos técnicos de transparência pública
-        self.transparency_terms = {
-            # Entidades
-            "ministerio", "secretaria", "orgao", "entidade", "empresa", "fornecedor",
-            # Tipos de contrato
-            "licitacao", "pregao", "concorrencia", "tomada_precos", "convite", "dispensa",
-            # Indicadores financeiros
-            "valor", "preco", "orcamento", "pagamento", "repasse", "empenho",
-            # Termos jurídicos
-            "conformidade", "irregularidade", "infração", "penalidade", "multa",
-            # Indicadores de corrupção
-            "superfaturamento", "direcionamento", "cartel", "fraude", "peculato"
         }
-        # Adicionar tokens especiais se necessário
-        special_tokens = ["[CONTRACT]", "[ENTITY]", "[VALUE]", "[ANOMALY]", "[LEGAL]"]
-        self.tokenizer.add_special_tokens({"additional_special_tokens": special_tokens})
-    def __len__(self) -> int:
-        return len(self.data)
-    def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
-        item = self.data[idx]
-        # Tokenizar texto
-        encoding = self.tokenizer(
-            item["text"],
-            truncation=True,
-            padding="max_length",
-            max_length=self.max_length,
-            return_tensors="pt"
-        )
-        # Preparar labels e features especializadas
-        result = {
-            "input_ids": encoding["input_ids"].squeeze(),
-            "attention_mask": encoding["attention_mask"].squeeze(),
         }
-        # Adicionar labels específicos por tarefa
-        if "anomaly_label" in item:
-            result["anomaly_labels"] = torch.tensor(item["anomaly_label"], dtype=torch.long)
-        if "financial_risk" in item:
-            result["financial_risk_labels"] = torch.tensor(item["financial_risk"], dtype=torch.long)
-        if "legal_compliance" in item:
-            result["legal_compliance_labels"] = torch.tensor(item["legal_compliance"], dtype=torch.long)
-        # Adicionar features especializadas
-        if "entity_types" in item:
-            entity_types = torch.zeros(self.max_length, dtype=torch.long)
-            for i, entity_type in enumerate(item["entity_types"][:self.max_length]):
-                entity_types[i] = entity_type
-            result["entity_types"] = entity_types
-        if "corruption_indicators" in item:
-            corruption_indicators = torch.zeros(self.max_length, dtype=torch.long)
-            for i, indicator in enumerate(item["corruption_indicators"][:self.max_length]):
-                corruption_indicators[i] = indicator
-            result["corruption_indicators"] = corruption_indicators
-        return result
-class CidadaoTrainer:
-    """Trainer especializado para Cidadão.AI"""
-    def __init__(
         self,
-        model: CidadaoAIForTransparency,
-        tokenizer: AutoTokenizer,
-        config: TrainingConfig
-    ):
-        self.model = model
-        self.tokenizer = tokenizer
-        self.config = config
-        # Configurar device
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self.model.to(self.device)
-        # Configurar otimizador
-        self.optimizer = AdamW(
-            self.model.parameters(),
-            lr=config.learning_rate,
-            weight_decay=config.weight_decay
-        )
-        # Configurar mixed precision se disponível
-        self.scaler = torch.cuda.amp.GradScaler() if config.use_mixed_precision else None
-        # Métricas de treinamento
-        self.training_history = {
-            "train_loss": [],
-            "eval_loss": [],
-            "eval_metrics": []
         }
-        # Early stopping
-        self.best_metric = float('-inf') if config.greater_is_better else float('inf')
-        self.patience_counter = 0
-        # Configurar logging
-        if config.use_wandb:
-            wandb.init(
-                project=config.wandb_project,
-                name=config.experiment_name,
-                config=asdict(config)
-            )
-    def train(
         self,
-        train_dataset: TransparencyDataset,
-        eval_dataset: Optional[TransparencyDataset] = None,
-        test_dataset: Optional[TransparencyDataset] = None
-    ):
-        """Executar treinamento completo"""
-        logger.info("🚀 Iniciando treinamento do Cidadão.AI")
-        # Preparar data loaders
-        train_loader = DataLoader(
-            train_dataset,
-            batch_size=self.config.batch_size,
-            shuffle=True,
-            num_workers=4
-        )
-        eval_loader = None
-        if eval_dataset:
-            eval_loader = DataLoader(
-                eval_dataset,
-                batch_size=self.config.batch_size,
-                shuffle=False,
-                num_workers=4
             )
-        # Configurar scheduler
-        total_steps = len(train_loader) * self.config.num_epochs
-        self.scheduler = get_linear_schedule_with_warmup(
-            self.optimizer,
-            num_warmup_steps=self.config.warmup_steps,
-            num_training_steps=total_steps
-        )
-        # Loop de treinamento
-        global_step = 0
-        for epoch in range(self.config.num_epochs):
-            logger.info(f"📚 Época {epoch + 1}/{self.config.num_epochs}")
-            # Treinamento
-            train_loss = self._train_epoch(train_loader, epoch, global_step)
-            self.training_history["train_loss"].append(train_loss)
-            # Avaliação
-            if eval_loader and (epoch + 1) % 1 == 0:  # Avaliar a cada época
-                eval_metrics = self._evaluate(eval_loader, epoch)
-                self.training_history["eval_metrics"].append(eval_metrics)
-                # Early stopping check
-                current_metric = eval_metrics[self.config.metric_for_best_model]
-                if self._is_better_metric(current_metric):
-                    self.best_metric = current_metric
-                    self.patience_counter = 0
-                    self._save_checkpoint(epoch, is_best=True)
-                    logger.info(f"🎯 Novo melhor modelo! {self.config.metric_for_best_model}: {current_metric:.4f}")
-                else:
-                    self.patience_counter += 1
-                if self.patience_counter >= self.config.early_stopping_patience:
-                    logger.info(f"⏰ Early stopping acionado após {self.patience_counter} épocas sem melhoria")
                     break
-            # Salvar checkpoint regular
-            if (epoch + 1) % 2 == 0:  # Salvar a cada 2 épocas
-                self._save_checkpoint(epoch, is_best=False)
-            global_step += len(train_loader)
-        # Avaliação final
-        if test_dataset:
-            test_loader = DataLoader(
-                test_dataset,
-                batch_size=self.config.batch_size,
-                shuffle=False,
-                num_workers=4
             )
-            logger.info("🧪 Executando avaliação final no conjunto de teste")
-            final_metrics = self._evaluate(test_loader, epoch=-1, is_test=True)
-            logger.info("📊 Métricas finais:")
-            for metric, value in final_metrics.items():
-                logger.info(f"  {metric}: {value:.4f}")
-        # Finalizar treinamento
-        self._finalize_training()
-    def _train_epoch(self, train_loader: DataLoader, epoch: int, global_step: int) -> float:
-        """Treinar uma época"""
-        self.model.train()
-        total_loss = 0.0
-        progress_bar = tqdm(train_loader, desc=f"Treinamento Época {epoch + 1}")
-        for step, batch in enumerate(progress_bar):
-            # Mover dados para device
-            batch = {k: v.to(self.device) for k, v in batch.items()}
-            # Forward pass com mixed precision
-            if self.scaler:
-                with torch.cuda.amp.autocast():
-                    loss = self._compute_multi_task_loss(batch)
-            else:
-                loss = self._compute_multi_task_loss(batch)
-            # Backward pass
-            if self.scaler:
-                self.scaler.scale(loss).backward()
-                if (step + 1) % self.config.gradient_accumulation_steps == 0:
-                    self.scaler.unscale_(self.optimizer)
-                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.config.max_grad_norm)
-                    self.scaler.step(self.optimizer)
-                    self.scaler.update()
-                    self.scheduler.step()
-                    self.optimizer.zero_grad()
-            else:
-                loss.backward()
-                if (step + 1) % self.config.gradient_accumulation_steps == 0:
-                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.config.max_grad_norm)
-                    self.optimizer.step()
-                    self.scheduler.step()
-                    self.optimizer.zero_grad()
-            total_loss += loss.item()
-            # Logging
-            if step % self.config.logging_steps == 0:
-                avg_loss = total_loss / (step + 1)
-                progress_bar.set_postfix({"loss": f"{avg_loss:.4f}"})
-                if self.config.use_wandb:
-                    wandb.log({
-                        "train/loss": avg_loss,
-                        "train/learning_rate": self.scheduler.get_last_lr()[0],
-                        "train/epoch": epoch,
-                        "train/step": global_step + step
-                    })
-        return total_loss / len(train_loader)
-    def _compute_multi_task_loss(self, batch: Dict[str, torch.Tensor]) -> torch.Tensor:
-        """Computar loss multi-tarefa"""
-        total_loss = 0.0
-        loss_weights = {
-            "anomaly": 1.0,
-            "financial": 0.8,
-            "legal": 0.6
-        }
-        # Loss de detecção de anomalias
-        if "anomaly_labels" in batch:
-            anomaly_outputs = self.model.detect_anomalies(
-                input_ids=batch["input_ids"],
-                attention_mask=batch["attention_mask"],
-                entity_types=batch.get("entity_types"),
-                corruption_indicators=batch.get("corruption_indicators")
-            )
-            # Extrair logits dos resultados
-            anomaly_logits = []
-            for pred in anomaly_outputs["predictions"]:
-                probs = [
-                    pred["probabilities"]["normal"],
-                    pred["probabilities"]["suspicious"],
-                    pred["probabilities"]["anomalous"]
-                ]
-                anomaly_logits.append(probs)
-            anomaly_logits = torch.tensor(anomaly_logits, device=self.device)
-            anomaly_loss = nn.CrossEntropyLoss()(anomaly_logits, batch["anomaly_labels"])
-            total_loss += loss_weights["anomaly"] * anomaly_loss
-        # Loss de análise financeira
-        if "financial_risk_labels" in batch:
-            financial_outputs = self.model.analyze_financial_risk(
-                input_ids=batch["input_ids"],
-                attention_mask=batch["attention_mask"]
-            )
-            # Extrair logits dos resultados
-            risk_logits = []
-            for pred in financial_outputs["predictions"]:
-                probs = list(pred["risk_probabilities"].values())
-                risk_logits.append(probs)
-            risk_logits = torch.tensor(risk_logits, device=self.device)
-            financial_loss = nn.CrossEntropyLoss()(risk_logits, batch["financial_risk_labels"])
-            total_loss += loss_weights["financial"] * financial_loss
-        # Loss de conformidade legal
-        if "legal_compliance_labels" in batch:
-            legal_outputs = self.model.check_legal_compliance(
-                input_ids=batch["input_ids"],
-                attention_mask=batch["attention_mask"]
-            )
-            # Extrair logits dos resultados
-            compliance_logits = []
-            for pred in legal_outputs["predictions"]:
-                probs = [
-                    pred["legal_analysis"]["non_compliant_prob"],
-                    pred["legal_analysis"]["compliant_prob"]
-                ]
-                compliance_logits.append(probs)
-            compliance_logits = torch.tensor(compliance_logits, device=self.device)
-            legal_loss = nn.CrossEntropyLoss()(compliance_logits, batch["legal_compliance_labels"])
-            total_loss += loss_weights["legal"] * legal_loss
-        return total_loss
-    def _evaluate(self, eval_loader: DataLoader, epoch: int, is_test: bool = False) -> Dict[str, float]:
-        """Avaliar modelo"""
-        self.model.eval()
-        total_loss = 0.0
-        # Coletar predições e labels
-        all_predictions = {
-            "anomaly": {"preds": [], "labels": []},
-            "financial": {"preds": [], "labels": []},
-            "legal": {"preds": [], "labels": []}
-        }
-        with torch.no_grad():
-            for batch in tqdm(eval_loader, desc="Avaliação"):
-                batch = {k: v.to(self.device) for k, v in batch.items()}
-                # Computar loss
-                loss = self._compute_multi_task_loss(batch)
-                total_loss += loss.item()
-                # Coletar predições
-                self._collect_predictions(batch, all_predictions)
-        avg_loss = total_loss / len(eval_loader)
-        # Computar métricas
-        metrics = {"eval_loss": avg_loss}
-        for task, preds_labels in all_predictions.items():
-            if preds_labels["preds"]:
-                task_metrics = self._compute_task_metrics(
-                    preds_labels["preds"],
-                    preds_labels["labels"],
-                    task_name=task
-                )
-                metrics.update(task_metrics)
-        # Logging
-        prefix = "test" if is_test else "eval"
-        log_metrics = {f"{prefix}/{k}": v for k, v in metrics.items()}
-        if self.config.use_wandb:
-            wandb.log(log_metrics)
-        return metrics
-    def _collect_predictions(self, batch: Dict[str, torch.Tensor], all_predictions: Dict):
-        """Coletar predições para avaliação"""
-        # Anomaly detection
-        if "anomaly_labels" in batch:
-            anomaly_outputs = self.model.detect_anomalies(
-                input_ids=batch["input_ids"],
-                attention_mask=batch["attention_mask"]
-            )
-            for i, pred in enumerate(anomaly_outputs["predictions"]):
-                anomaly_type_map = {"Normal": 0, "Suspeito": 1, "Anômalo": 2}
-                pred_label = anomaly_type_map[pred["anomaly_type"]]
-                all_predictions["anomaly"]["preds"].append(pred_label)
-                all_predictions["anomaly"]["labels"].append(batch["anomaly_labels"][i].item())
-        # Financial analysis
-        if "financial_risk_labels" in batch:
-            financial_outputs = self.model.analyze_financial_risk(
-                input_ids=batch["input_ids"],
-                attention_mask=batch["attention_mask"]
-            )
-            for i, pred in enumerate(financial_outputs["predictions"]):
-                risk_level_map = {"Muito Baixo": 0, "Baixo": 1, "Médio": 2, "Alto": 3, "Muito Alto": 4}
-                pred_label = risk_level_map[pred["risk_level"]]
-                all_predictions["financial"]["preds"].append(pred_label)
-                all_predictions["financial"]["labels"].append(batch["financial_risk_labels"][i].item())
-        # Legal compliance
-        if "legal_compliance_labels" in batch:
-            legal_outputs = self.model.check_legal_compliance(
-                input_ids=batch["input_ids"],
-                attention_mask=batch["attention_mask"]
-            )
-            for i, pred in enumerate(legal_outputs["predictions"]):
-                pred_label = 1 if pred["is_compliant"] else 0
-                all_predictions["legal"]["preds"].append(pred_label)
-                all_predictions["legal"]["labels"].append(batch["legal_compliance_labels"][i].item())
-    def _compute_task_metrics(self, predictions: List, labels: List, task_name: str) -> Dict[str, float]:
-        """Computar métricas para uma tarefa específica"""
-        accuracy = accuracy_score(labels, predictions)
-        precision, recall, f1, _ = precision_recall_fscore_support(
-            labels, predictions, average='weighted'
-        )
-        metrics = {
-            f"eval_{task_name}_accuracy": accuracy,
-            f"eval_{task_name}_precision": precision,
-            f"eval_{task_name}_recall": recall,
-            f"eval_{task_name}_f1": f1
-        }
-        # Métrica composta para early stopping
-        if task_name == "anomaly":  # Usar anomaly como principal
-            metrics["eval_f1"] = f1
-        return metrics
-    def _is_better_metric(self, current_metric: float) -> bool:
-        """Verificar se métrica atual é melhor"""
-        if self.config.greater_is_better:
-            return current_metric > self.best_metric
-        else:
-            return current_metric < self.best_metric
-    def _save_checkpoint(self, epoch: int, is_best: bool = False):
-        """Salvar checkpoint do modelo"""
-        output_dir = Path(self.config.output_dir)
-        output_dir.mkdir(parents=True, exist_ok=True)
-        if is_best:
-            save_path = output_dir / "best_model"
-        else:
-            save_path = output_dir / f"checkpoint-epoch-{epoch}"
-        # Salvar modelo
-        self.model.save_model(str(save_path))
-        # Salvar estado do treinamento
-        training_state = {
-            "epoch": epoch,
-            "optimizer_state_dict": self.optimizer.state_dict(),
-            "scheduler_state_dict": self.scheduler.state_dict(),
-            "best_metric": self.best_metric,
-            "training_history": self.training_history
-        }
-        torch.save(training_state, save_path / "training_state.pt")
-        logger.info(f"✅ Checkpoint salvo em {save_path}")
-    def _finalize_training(self):
-        """Finalizar treinamento"""
-        # Salvar histórico de treinamento
-        output_dir = Path(self.config.output_dir)
-        with open(output_dir / "training_history.json", "w") as f:
-            json_utils.dump(self.training_history, f, indent=2)
-        # Plotar curvas de treinamento
-        self._plot_training_curves()
-        if self.config.use_wandb:
-            wandb.finish()
-        logger.info("🎉 Treinamento finalizado com sucesso!")
-    def _plot_training_curves(self):
-        """Plotar curvas de treinamento"""
-        fig, axes = plt.subplots(2, 2, figsize=(15, 10))
-        # Loss de treinamento
-        epochs = range(1, len(self.training_history["train_loss"]) + 1)
-        axes[0, 0].plot(epochs, self.training_history["train_loss"])
-        axes[0, 0].set_title("Loss de Treinamento")
-        axes[0, 0].set_xlabel("Época")
-        axes[0, 0].set_ylabel("Loss")
-        # Métricas de avaliação
-        if self.training_history["eval_metrics"]:
-            eval_epochs = range(1, len(self.training_history["eval_metrics"]) + 1)
-            # F1 Score
-            f1_scores = [m.get("eval_f1", 0) for m in self.training_history["eval_metrics"]]
-            axes[0, 1].plot(eval_epochs, f1_scores, 'g-')
-            axes[0, 1].set_title("F1 Score")
-            axes[0, 1].set_xlabel("Época")
-            axes[0, 1].set_ylabel("F1")
-            # Accuracy
-            accuracy_scores = [m.get("eval_anomaly_accuracy", 0) for m in self.training_history["eval_metrics"]]
-            axes[1, 0].plot(eval_epochs, accuracy_scores, 'b-')
-            axes[1, 0].set_title("Accuracy")
-            axes[1, 0].set_xlabel("Época")
-            axes[1, 0].set_ylabel("Accuracy")
-            # Loss de avaliação
-            eval_losses = [m.get("eval_loss", 0) for m in self.training_history["eval_metrics"]]
-            axes[1, 1].plot(eval_epochs, eval_losses, 'r-')
-            axes[1, 1].set_title("Loss de Avaliação")
-            axes[1, 1].set_xlabel("Época")
-            axes[1, 1].set_ylabel("Loss")
-        plt.tight_layout()
-        # Salvar plot
-        output_dir = Path(self.config.output_dir)
-        plt.savefig(output_dir / "training_curves.png", dpi=300, bbox_inches='tight')
-        plt.close()
-def create_training_pipeline(
-    data_path: str,
-    config: Optional[TrainingConfig] = None
-) -> Tuple[CidadaoAIForTransparency, CidadaoTrainer]:
-    """
-    Criar pipeline de treinamento completo
-    Args:
-        data_path: Caminho para dados de treinamento
-        config: Configuração de treinamento
-    Returns:
-        Tuple com modelo e trainer
-    """
-    if config is None:
-        config = TrainingConfig()
-    logger.info("🏗️ Criando pipeline de treinamento Cidadão.AI")
-    # Criar modelo
-    model = create_cidadao_model(
-        specialized_tasks=config.specialized_tasks,
-        model_size=config.model_size
-    )
-    # Criar tokenizer
-    tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
-    tokenizer.pad_token = tokenizer.eos_token
-    # Redimensionar embeddings se necessário
-    model.model.model.resize_token_embeddings(len(tokenizer))
-    # Criar trainer
-    trainer = CidadaoTrainer(model, tokenizer, config)
-    logger.info(f"✅ Pipeline criado - Modelo: {config.model_size}, Tarefas: {config.specialized_tasks}")
-    return model, trainer
-def prepare_transparency_data(data_path: str, output_dir: str = "./data/processed"):
-    """
-    Preparar dados de transparência para treinamento
-    Esta função seria expandida para processar dados reais do Portal da Transparência
-    """
-    logger.info("📊 Preparando dados de transparência")
-    output_dir = Path(output_dir)
-    output_dir.mkdir(parents=True, exist_ok=True)
-    # Aqui você implementaria:
-    # 1. Conexão com Portal da Transparência API
-    # 2. Extração e limpeza de dados
-    # 3. Anotação de anomalias (semi-supervisionado)
-    # 4. Balanceamento de classes
-    # 5. Divisão train/val/test
-    # Por enquanto, criar dados sintéticos
-    logger.info("⚠️ Usando dados sintéticos para demonstração")
-    # Implementação completa seria conectada aos dados reais
-    sample_data = {
-        "train": output_dir / "train.json",
-        "val": output_dir / "val.json",
-        "test": output_dir / "test.json"
-    }
-    return sample_data
-if __name__ == "__main__":
-    # Exemplo de uso
-    # Configurar logging
-    logging.basicConfig(level=logging.INFO)
-    # Configuração de treinamento
-    config = TrainingConfig(
-        experiment_name="cidadao-gpt-transparency-v1",
-        num_epochs=5,
-        batch_size=4,  # Reduzido para teste
-        learning_rate=2e-5,
-        use_wandb=False,  # Desabilitar para teste
-        output_dir="./models/cidadao-gpt-test"
-    )
-    # Criar pipeline
-    model, trainer = create_training_pipeline(
-        data_path="./data/transparency_data.json",
-        config=config
-    )
-    print("🤖 Cidadão.AI Training Pipeline criado com sucesso!")
-    print(f"📊 Modelo: {config.model_size}")
-    print(f"🎯 Tarefas especializadas: {config.specialized_tasks}")
-    print(f"💾 Diretório de saída: {config.output_dir}")

 """
+ML Training Pipeline for Cidadão.AI
+This module provides a comprehensive training pipeline for ML models
+used in anomaly detection, fraud detection, and pattern recognition.
 """
+import asyncio
+import json
 import os
+from datetime import datetime
+from typing import Dict, Any, List, Optional, Tuple, Union
 from pathlib import Path
+import pickle
+import joblib
+import numpy as np
+from sklearn.model_selection import train_test_split, cross_val_score
+from sklearn.metrics import (
+    accuracy_score, precision_score, recall_score, f1_score,
+    roc_auc_score, confusion_matrix, classification_report
+)
+from sklearn.preprocessing import StandardScaler, LabelEncoder
+from sklearn.ensemble import IsolationForest, RandomForestClassifier
+from sklearn.svm import OneClassSVM
+from sklearn.neighbors import LocalOutlierFactor
+import mlflow
+import mlflow.sklearn
+from mlflow.tracking import MlflowClient
+from src.core import get_logger, settings
+from src.core.exceptions import CidadaoAIError
+from src.infrastructure.cache.redis_client import get_redis_client
+from src.models.ml_models import AnomalyDetectorModel
+logger = get_logger(__name__)
+class MLTrainingPipeline:
+    """
+    Comprehensive ML training pipeline with versioning and tracking.
+    Features:
+    - Multiple algorithm support
+    - Automatic hyperparameter tuning
+    - Model versioning with MLflow
+    - Performance tracking
+    - A/B testing support
+    """
+    def __init__(self, experiment_name: str = "cidadao_ai_models"):
+        """Initialize the training pipeline."""
+        self.experiment_name = experiment_name
+        self.mlflow_client = None
+        self.models_dir = Path(settings.get("ML_MODELS_DIR", "./models"))
+        self.models_dir.mkdir(exist_ok=True)
+        # Supported algorithms
+        self.algorithms = {
+            "isolation_forest": IsolationForest,
+            "one_class_svm": OneClassSVM,
+            "random_forest": RandomForestClassifier,
+            "local_outlier_factor": LocalOutlierFactor
+        }
+        # Model registry
+        self.model_registry = {}
+        self._initialize_mlflow()
+    def _initialize_mlflow(self):
+        """Initialize MLflow tracking."""
+        try:
+            mlflow.set_tracking_uri(settings.get("MLFLOW_TRACKING_URI", "file:./mlruns"))
+            mlflow.set_experiment(self.experiment_name)
+            self.mlflow_client = MlflowClient()
+            logger.info(f"MLflow initialized with experiment: {self.experiment_name}")
+        except Exception as e:
+            logger.warning(f"MLflow initialization failed: {e}. Using local tracking.")
+    async def train_model(
+        self,
+        model_type: str,
+        algorithm: str,
+        X_train: np.ndarray,
+        y_train: Optional[np.ndarray] = None,
+        hyperparameters: Optional[Dict[str, Any]] = None,
+        metadata: Optional[Dict[str, Any]] = None
+    ) -> Dict[str, Any]:
+        """
+        Train a model with the specified algorithm.
+        Args:
+            model_type: Type of model (anomaly, fraud, pattern)
+            algorithm: Algorithm to use
+            X_train: Training features
+            y_train: Training labels (optional for unsupervised)
+            hyperparameters: Model hyperparameters
+            metadata: Additional metadata
+        Returns:
+            Training results with model info
+        """
+        try:
+            logger.info(f"Starting training for {model_type} with {algorithm}")
+            # Start MLflow run
+            with mlflow.start_run(run_name=f"{model_type}_{algorithm}_{datetime.now().isoformat()}"):
+                # Log parameters
+                mlflow.log_param("model_type", model_type)
+                mlflow.log_param("algorithm", algorithm)
+                mlflow.log_param("n_samples", X_train.shape[0])
+                mlflow.log_param("n_features", X_train.shape[1])
+                if hyperparameters:
+                    for key, value in hyperparameters.items():
+                        mlflow.log_param(f"param_{key}", value)
+                # Create and train model
+                model = await self._create_model(algorithm, hyperparameters)
+                # Train based on supervised/unsupervised
+                if y_train is not None:
+                    # Supervised training
+                    X_tr, X_val, y_tr, y_val = train_test_split(
+                        X_train, y_train, test_size=0.2, random_state=42
+                    )
+                    model.fit(X_tr, y_tr)
+                    # Evaluate
+                    y_pred = model.predict(X_val)
+                    metrics = self._calculate_metrics(y_val, y_pred)
+                    # Cross-validation
+                    cv_scores = cross_val_score(model, X_train, y_train, cv=5)
+                    metrics["cv_score_mean"] = cv_scores.mean()
+                    metrics["cv_score_std"] = cv_scores.std()
+                else:
+                    # Unsupervised training
+                    model.fit(X_train)
+                    # Evaluate with anomaly scores
+                    if hasattr(model, 'score_samples'):
+                        anomaly_scores = model.score_samples(X_train)
+                        metrics = {
+                            "anomaly_score_mean": float(np.mean(anomaly_scores)),
+                            "anomaly_score_std": float(np.std(anomaly_scores)),
+                            "anomaly_score_min": float(np.min(anomaly_scores)),
+                            "anomaly_score_max": float(np.max(anomaly_scores))
+                        }
+                    else:
+                        metrics = {"training_complete": True}
+                # Log metrics
+                for metric_name, metric_value in metrics.items():
+                    mlflow.log_metric(metric_name, metric_value)
+                # Save model
+                model_path = await self._save_model(
+                    model, model_type, algorithm, metrics, metadata
+                )
+                # Log model to MLflow
+                mlflow.sklearn.log_model(
+                    model,
+                    f"{model_type}_{algorithm}",
+                    registered_model_name=f"{model_type}_{algorithm}_model"
+                )
+                # Create model version
+                version = await self._create_model_version(
+                    model_type, algorithm, model_path, metrics
+                )
+                return {
+                    "success": True,
+                    "model_id": version["model_id"],
+                    "version": version["version"],
+                    "metrics": metrics,
+                    "model_path": model_path,
+                    "run_id": mlflow.active_run().info.run_id
+                }
+        except Exception as e:
+            logger.error(f"Training failed: {str(e)}")
+            return {
+                "success": False,
+                "error": str(e),
+                "model_id": None
+            }
+    async def _create_model(
+        self,
+        algorithm: str,
+        hyperparameters: Optional[Dict[str, Any]] = None
+    ) -> Any:
+        """Create a model instance with hyperparameters."""
+        if algorithm not in self.algorithms:
+            raise ValueError(f"Unsupported algorithm: {algorithm}")
+        model_class = self.algorithms[algorithm]
+        # Default hyperparameters
+        default_params = {
+            "isolation_forest": {
+                "contamination": 0.1,
+                "random_state": 42,
+                "n_estimators": 100
+            },
+            "one_class_svm": {
+                "gamma": 0.001,
+                "nu": 0.05,
+                "kernel": "rbf"
+            },
+            "random_forest": {
+                "n_estimators": 100,
+                "random_state": 42,
+                "max_depth": 10
+            },
+            "local_outlier_factor": {
+                "contamination": 0.1,
+                "n_neighbors": 20
+            }
+        }
+        # Merge with provided hyperparameters
+        params = default_params.get(algorithm, {})
+        if hyperparameters:
+            params.update(hyperparameters)
+        return model_class(**params)
+    def _calculate_metrics(
+        self,
+        y_true: np.ndarray,
+        y_pred: np.ndarray,
+        y_proba: Optional[np.ndarray] = None
+    ) -> Dict[str, float]:
+        """Calculate comprehensive metrics for model evaluation."""
+        metrics = {
+            "accuracy": float(accuracy_score(y_true, y_pred)),
+            "precision": float(precision_score(y_true, y_pred, average='weighted')),
+            "recall": float(recall_score(y_true, y_pred, average='weighted')),
+            "f1_score": float(f1_score(y_true, y_pred, average='weighted'))
         }
+        # Add ROC-AUC if probabilities available
+        if y_proba is not None and len(np.unique(y_true)) == 2:
+            metrics["roc_auc"] = float(roc_auc_score(y_true, y_proba[:, 1]))
+        return metrics
+    async def _save_model(
+        self,
+        model: Any,
+        model_type: str,
+        algorithm: str,
+        metrics: Dict[str, Any],
+        metadata: Optional[Dict[str, Any]] = None
+    ) -> str:
+        """Save trained model to disk."""
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        model_filename = f"{model_type}_{algorithm}_{timestamp}.pkl"
+        model_path = self.models_dir / model_filename
+        # Create model package
+        model_package = {
+            "model": model,
+            "model_type": model_type,
+            "algorithm": algorithm,
+            "metrics": metrics,
+            "metadata": metadata or {},
+            "created_at": datetime.now().isoformat(),
+            "version": timestamp
         }
+        # Save with joblib for better compression
+        joblib.dump(model_package, model_path)
+        logger.info(f"Model saved to: {model_path}")
+        return str(model_path)
+    async def _create_model_version(
         self,
+        model_type: str,
+        algorithm: str,
+        model_path: str,
+        metrics: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """Create a versioned model entry in the registry."""
+        model_id = f"{model_type}_{algorithm}"
+        # Get or create model entry
+        if model_id not in self.model_registry:
+            self.model_registry[model_id] = {
+                "versions": [],
+                "current_version": None,
+                "created_at": datetime.now().isoformat()
+            }
+        # Add new version
+        version = {
+            "version": len(self.model_registry[model_id]["versions"]) + 1,
+            "path": model_path,
+            "metrics": metrics,
+            "created_at": datetime.now().isoformat(),
+            "status": "staging"  # staging, production, archived
+        }
+        self.model_registry[model_id]["versions"].append(version)
+        # Save registry to Redis
+        redis_client = await get_redis_client()
+        await redis_client.set(
+            f"ml_model_registry:{model_id}",
+            json.dumps(self.model_registry[model_id]),
+            ex=86400 * 30  # 30 days
+        )
+        return {
+            "model_id": model_id,
+            "version": version["version"]
         }
+    async def load_model(
         self,
+        model_id: str,
+        version: Optional[int] = None
+    ) -> Tuple[Any, Dict[str, Any]]:
+        """
+        Load a model from the registry.
+        Args:
+            model_id: Model identifier
+            version: Specific version (latest if None)
+        Returns:
+            Tuple of (model, metadata)
+        """
+        # Try to load from Redis first
+        redis_client = await get_redis_client()
+        registry_data = await redis_client.get(f"ml_model_registry:{model_id}")
+        if registry_data:
+            registry = json.loads(registry_data)
+        elif model_id in self.model_registry:
+            registry = self.model_registry[model_id]
+        else:
+            raise ValueError(f"Model {model_id} not found in registry")
+        # Get version
+        if version is None:
+            # Get latest production version or latest version
+            prod_versions = [
+                v for v in registry["versions"]
+                if v.get("status") == "production"
+            ]
+            if prod_versions:
+                version_data = max(prod_versions, key=lambda v: v["version"])
+            else:
+                version_data = max(registry["versions"], key=lambda v: v["version"])
+        else:
+            version_data = next(
+                (v for v in registry["versions"] if v["version"] == version),
+                None
             )
+            if not version_data:
+                raise ValueError(f"Version {version} not found for model {model_id}")
+        # Load model
+        model_package = joblib.load(version_data["path"])
+        return model_package["model"], model_package
+    async def promote_model(
+        self,
+        model_id: str,
+        version: int,
+        status: str = "production"
+    ) -> bool:
+        """
+        Promote a model version to production.
+        Args:
+            model_id: Model identifier
+            version: Version to promote
+            status: New status (production, staging, archived)
+        """
+        try:
+            # Load registry
+            redis_client = await get_redis_client()
+            registry_data = await redis_client.get(f"ml_model_registry:{model_id}")
+            if registry_data:
+                registry = json.loads(registry_data)
+            else:
+                registry = self.model_registry.get(model_id)
+            if not registry:
+                raise ValueError(f"Model {model_id} not found")
+            # Update version status
+            for v in registry["versions"]:
+                if v["version"] == version:
+                    # Archive current production if promoting to production
+                    if status == "production":
+                        for other_v in registry["versions"]:
+                            if other_v.get("status") == "production":
+                                other_v["status"] = "archived"
+                    v["status"] = status
+                    v["promoted_at"] = datetime.now().isoformat()
                     break
+            # Save updated registry
+            self.model_registry[model_id] = registry
+            await redis_client.set(
+                f"ml_model_registry:{model_id}",
+                json.dumps(registry),
+                ex=86400 * 30
             )
+            logger.info(f"Promoted {model_id} v{version} to {status}")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to promote model: {e}")
+            return False
+    async def get_model_metrics(
+        self,
+        model_id: str,
+        version: Optional[int] = None
+    ) -> Dict[str, Any]:
+        """Get metrics for a specific model version."""
+        _, metadata = await self.load_model(model_id, version)
+        return metadata.get("metrics", {})
+    async def compare_models(
+        self,
+        model_ids: List[Tuple[str, Optional[int]]],
+        test_data: np.ndarray,
+        test_labels: Optional[np.ndarray] = None
+    ) -> Dict[str, Any]:
+        """
+        Compare multiple models on the same test data.
+        Args:
+            model_ids: List of (model_id, version) tuples
+            test_data: Test features
+            test_labels: Test labels (if available)
+        Returns:
+            Comparison results
+        """
+        results = {}
+        for model_id, version in model_ids:
+            try:
+                model, metadata = await self.load_model(model_id, version)
+                # Make predictions
+                predictions = model.predict(test_data)
+                result = {
+                    "model_id": model_id,
+                    "version": version or "latest",
+                    "algorithm": metadata.get("algorithm"),
+                    "training_metrics": metadata.get("metrics", {})
+                }
+                # Calculate test metrics if labels available
+                if test_labels is not None:
+                    test_metrics = self._calculate_metrics(test_labels, predictions)
+                    result["test_metrics"] = test_metrics
+                # Add anomaly scores for unsupervised models
+                if hasattr(model, 'score_samples'):
+                    scores = model.score_samples(test_data)
+                    result["anomaly_scores"] = {
+                        "mean": float(np.mean(scores)),
+                        "std": float(np.std(scores)),
+                        "percentiles": {
+                            "10": float(np.percentile(scores, 10)),
+                            "50": float(np.percentile(scores, 50)),
+                            "90": float(np.percentile(scores, 90))
+                        }
+                    }
+                results[f"{model_id}_v{version or 'latest'}"] = result
+            except Exception as e:
+                logger.error(f"Failed to evaluate {model_id}: {e}")
+                results[f"{model_id}_v{version or 'latest'}"] = {
+                    "error": str(e)
+                }
+        return results
+    async def cleanup_old_models(self, days: int = 30) -> int:
+        """Remove models older than specified days."""
+        count = 0
+        cutoff_date = datetime.now().timestamp() - (days * 86400)
+        for model_file in self.models_dir.glob("*.pkl"):
+            if model_file.stat().st_mtime < cutoff_date:
+                model_file.unlink()
+                count += 1
+                logger.info(f"Removed old model: {model_file}")
+        return count
+# Global training pipeline instance
+training_pipeline = MLTrainingPipeline()
+async def get_training_pipeline() -> MLTrainingPipeline:
+    """Get the global training pipeline instance."""
+    return training_pipeline

tests/unit/ml/__init__.py ADDED Viewed

File without changes

tests/unit/ml/test_training_pipeline.py ADDED Viewed

	@@ -0,0 +1,369 @@

+"""
+Unit tests for ML Training Pipeline
+"""
+import pytest
+import asyncio
+import numpy as np
+from datetime import datetime
+from unittest.mock import AsyncMock, MagicMock, patch
+import json
+from src.ml.training_pipeline import MLTrainingPipeline, training_pipeline
+from src.ml.ab_testing import ABTestFramework, ABTestStatus, TrafficAllocationStrategy
+class TestMLTrainingPipeline:
+    """Test suite for ML training pipeline."""
+    @pytest.fixture
+    def pipeline(self):
+        """Create a test pipeline instance."""
+        return MLTrainingPipeline(experiment_name="test_experiment")
+    @pytest.fixture
+    def sample_data(self):
+        """Generate sample training data."""
+        X_train = np.random.randn(100, 10)
+        y_train = np.random.choice([0, 1], size=100)
+        return X_train, y_train
+    @pytest.mark.asyncio
+    async def test_pipeline_initialization(self, pipeline):
+        """Test pipeline initialization."""
+        assert pipeline.experiment_name == "test_experiment"
+        assert pipeline.models_dir.exists()
+        assert len(pipeline.algorithms) > 0
+        assert "isolation_forest" in pipeline.algorithms
+    @pytest.mark.asyncio
+    async def test_train_unsupervised_model(self, pipeline, sample_data):
+        """Test training an unsupervised model."""
+        X_train, _ = sample_data
+        with patch('mlflow.start_run'), \
+             patch('mlflow.log_param'), \
+             patch('mlflow.log_metric'), \
+             patch('mlflow.sklearn.log_model'):
+            result = await pipeline.train_model(
+                model_type="anomaly",
+                algorithm="isolation_forest",
+                X_train=X_train,
+                hyperparameters={"contamination": 0.1}
+            )
+            assert result["success"] is True
+            assert result["model_id"] == "anomaly_isolation_forest"
+            assert result["version"] == 1
+            assert "metrics" in result
+            assert "model_path" in result
+    @pytest.mark.asyncio
+    async def test_train_supervised_model(self, pipeline, sample_data):
+        """Test training a supervised model."""
+        X_train, y_train = sample_data
+        with patch('mlflow.start_run'), \
+             patch('mlflow.log_param'), \
+             patch('mlflow.log_metric'), \
+             patch('mlflow.sklearn.log_model'):
+            result = await pipeline.train_model(
+                model_type="fraud",
+                algorithm="random_forest",
+                X_train=X_train,
+                y_train=y_train,
+                hyperparameters={"n_estimators": 50}
+            )
+            assert result["success"] is True
+            assert result["model_id"] == "fraud_random_forest"
+            assert "accuracy" in result["metrics"]
+            assert "precision" in result["metrics"]
+            assert "recall" in result["metrics"]
+            assert "f1_score" in result["metrics"]
+    @pytest.mark.asyncio
+    async def test_model_versioning(self, pipeline, sample_data):
+        """Test model versioning system."""
+        X_train, _ = sample_data
+        with patch('mlflow.start_run'), \
+             patch('mlflow.log_param'), \
+             patch('mlflow.log_metric'), \
+             patch('mlflow.sklearn.log_model'), \
+             patch.object(pipeline, '_save_model') as mock_save:
+            # Mock save to return a path
+            mock_save.return_value = "/models/test_model.pkl"
+            # Train first version
+            result1 = await pipeline.train_model(
+                model_type="anomaly",
+                algorithm="isolation_forest",
+                X_train=X_train
+            )
+            # Train second version
+            result2 = await pipeline.train_model(
+                model_type="anomaly",
+                algorithm="isolation_forest",
+                X_train=X_train
+            )
+            assert result1["version"] == 1
+            assert result2["version"] == 2
+            assert pipeline.model_registry["anomaly_isolation_forest"]["versions"].__len__() == 2
+    @pytest.mark.asyncio
+    async def test_load_model(self, pipeline, sample_data):
+        """Test loading a model from registry."""
+        X_train, _ = sample_data
+        # Create a mock model
+        mock_model = MagicMock()
+        model_package = {
+            "model": mock_model,
+            "model_type": "anomaly",
+            "algorithm": "isolation_forest",
+            "metrics": {"score": 0.95},
+            "created_at": datetime.now().isoformat()
+        }
+        with patch('joblib.load', return_value=model_package), \
+             patch.object(pipeline, 'model_registry', {
+                 "anomaly_isolation_forest": {
+                     "versions": [{
+                         "version": 1,
+                         "path": "/models/test.pkl",
+                         "status": "production"
+                     }]
+                 }
+             }):
+            model, metadata = await pipeline.load_model("anomaly_isolation_forest")
+            assert model == mock_model
+            assert metadata["model_type"] == "anomaly"
+            assert metadata["algorithm"] == "isolation_forest"
+    @pytest.mark.asyncio
+    async def test_promote_model(self, pipeline):
+        """Test promoting a model to production."""
+        with patch('src.infrastructure.cache.redis_client.get_redis_client') as mock_redis:
+            mock_redis_client = AsyncMock()
+            mock_redis_client.get.return_value = json.dumps({
+                "versions": [
+                    {"version": 1, "status": "staging"},
+                    {"version": 2, "status": "staging"}
+                ]
+            })
+            mock_redis_client.set = AsyncMock()
+            mock_redis.return_value = mock_redis_client
+            success = await pipeline.promote_model("test_model", 2, "production")
+            assert success is True
+            mock_redis_client.set.assert_called_once()
+    @pytest.mark.asyncio
+    async def test_compare_models(self, pipeline):
+        """Test comparing multiple models."""
+        test_data = np.random.randn(50, 10)
+        test_labels = np.random.choice([0, 1], size=50)
+        # Mock models
+        mock_model1 = MagicMock()
+        mock_model1.predict.return_value = np.ones(50)
+        mock_model1.score_samples = MagicMock(return_value=np.random.randn(50))
+        mock_model2 = MagicMock()
+        mock_model2.predict.return_value = np.zeros(50)
+        with patch.object(pipeline, 'load_model') as mock_load:
+            mock_load.side_effect = [
+                (mock_model1, {"algorithm": "isolation_forest", "metrics": {}}),
+                (mock_model2, {"algorithm": "random_forest", "metrics": {}})
+            ]
+            results = await pipeline.compare_models(
+                [("model1", 1), ("model2", 2)],
+                test_data,
+                test_labels
+            )
+            assert "model1_v1" in results
+            assert "model2_v2" in results
+            assert "test_metrics" in results["model1_v1"]
+            assert "anomaly_scores" in results["model1_v1"]
+class TestABTestingFramework:
+    """Test suite for A/B testing framework."""
+    @pytest.fixture
+    def ab_framework(self):
+        """Create a test A/B testing framework."""
+        return ABTestFramework()
+    @pytest.mark.asyncio
+    async def test_create_ab_test(self, ab_framework):
+        """Test creating an A/B test."""
+        with patch.object(training_pipeline, 'load_model') as mock_load, \
+             patch('src.infrastructure.cache.redis_client.get_redis_client') as mock_redis:
+            # Mock model loading
+            mock_load.return_value = (MagicMock(), {})
+            # Mock Redis
+            mock_redis_client = AsyncMock()
+            mock_redis_client.set = AsyncMock()
+            mock_redis.return_value = mock_redis_client
+            test_config = await ab_framework.create_test(
+                test_name="test_ab",
+                model_a=("model1", 1),
+                model_b=("model2", 1),
+                traffic_split=(0.6, 0.4),
+                success_metric="accuracy"
+            )
+            assert test_config["test_name"] == "test_ab"
+            assert test_config["traffic_split"] == (0.6, 0.4)
+            assert test_config["status"] == ABTestStatus.DRAFT.value
+            assert test_config["model_a"]["model_id"] == "model1"
+            assert test_config["model_b"]["model_id"] == "model2"
+    @pytest.mark.asyncio
+    async def test_start_ab_test(self, ab_framework):
+        """Test starting an A/B test."""
+        # Create test first
+        test_config = {
+            "test_name": "test_ab",
+            "status": ABTestStatus.DRAFT.value,
+            "start_time": None
+        }
+        ab_framework.active_tests["test_ab"] = test_config
+        with patch('src.infrastructure.cache.redis_client.get_redis_client') as mock_redis:
+            mock_redis_client = AsyncMock()
+            mock_redis_client.set = AsyncMock()
+            mock_redis.return_value = mock_redis_client
+            success = await ab_framework.start_test("test_ab")
+            assert success is True
+            assert test_config["status"] == ABTestStatus.RUNNING.value
+            assert test_config["start_time"] is not None
+    @pytest.mark.asyncio
+    async def test_allocate_model_random(self, ab_framework):
+        """Test random model allocation."""
+        test_config = {
+            "test_name": "test_ab",
+            "status": ABTestStatus.RUNNING.value,
+            "allocation_strategy": TrafficAllocationStrategy.RANDOM.value,
+            "traffic_split": (0.5, 0.5),
+            "model_a": {"model_id": "model1", "version": 1},
+            "model_b": {"model_id": "model2", "version": 1}
+        }
+        ab_framework.active_tests["test_ab"] = test_config
+        # Test multiple allocations
+        allocations = []
+        for _ in range(100):
+            model_id, version = await ab_framework.allocate_model("test_ab")
+            allocations.append(model_id)
+        # Should have both models allocated
+        assert "model1" in allocations
+        assert "model2" in allocations
+    @pytest.mark.asyncio
+    async def test_record_prediction(self, ab_framework):
+        """Test recording prediction results."""
+        test_config = {
+            "test_name": "test_ab",
+            "status": ABTestStatus.RUNNING.value,
+            "allocation_strategy": TrafficAllocationStrategy.RANDOM.value,
+            "results": {
+                "model_a": {"predictions": 0, "successes": 0},
+                "model_b": {"predictions": 0, "successes": 0}
+            },
+            "minimum_sample_size": 10
+        }
+        ab_framework.active_tests["test_ab"] = test_config
+        with patch('src.infrastructure.cache.redis_client.get_redis_client') as mock_redis:
+            mock_redis_client = AsyncMock()
+            mock_redis_client.set = AsyncMock()
+            mock_redis.return_value = mock_redis_client
+            # Record some predictions
+            await ab_framework.record_prediction("test_ab", "model_a", True)
+            await ab_framework.record_prediction("test_ab", "model_a", False)
+            await ab_framework.record_prediction("test_ab", "model_b", True)
+            assert test_config["results"]["model_a"]["predictions"] == 2
+            assert test_config["results"]["model_a"]["successes"] == 1
+            assert test_config["results"]["model_b"]["predictions"] == 1
+            assert test_config["results"]["model_b"]["successes"] == 1
+    @pytest.mark.asyncio
+    async def test_analyze_test(self, ab_framework):
+        """Test analyzing A/B test results."""
+        test_config = {
+            "test_name": "test_ab",
+            "results": {
+                "model_a": {"predictions": 1000, "successes": 520},
+                "model_b": {"predictions": 1000, "successes": 480}
+            },
+            "significance_level": 0.05
+        }
+        ab_framework.active_tests["test_ab"] = test_config
+        with patch('src.infrastructure.cache.redis_client.get_redis_client') as mock_redis:
+            mock_redis_client = AsyncMock()
+            mock_redis_client.set = AsyncMock()
+            mock_redis.return_value = mock_redis_client
+            analysis = await ab_framework.analyze_test("test_ab")
+            assert "model_a" in analysis
+            assert "model_b" in analysis
+            assert "p_value" in analysis
+            assert "significant" in analysis
+            assert "lift" in analysis
+            assert analysis["model_a"]["conversion_rate"] == 0.52
+            assert analysis["model_b"]["conversion_rate"] == 0.48
+    @pytest.mark.asyncio
+    async def test_thompson_sampling_allocation(self, ab_framework):
+        """Test Thompson sampling allocation."""
+        test_config = {
+            "test_name": "test_ab",
+            "status": ABTestStatus.RUNNING.value,
+            "allocation_strategy": TrafficAllocationStrategy.THOMPSON_SAMPLING.value,
+            "thompson_params": {
+                "model_a": {"alpha": 10, "beta": 5},
+                "model_b": {"alpha": 5, "beta": 10}
+            },
+            "model_a": {"model_id": "model1", "version": 1},
+            "model_b": {"model_id": "model2", "version": 1}
+        }
+        ab_framework.active_tests["test_ab"] = test_config
+        # Test allocation - should favor model_a due to higher success rate
+        allocations = []
+        for _ in range(100):
+            model_id, _ = await ab_framework.allocate_model("test_ab")
+            allocations.append(model_id)
+        # Model 1 should be allocated more often
+        model1_count = allocations.count("model1")
+        model2_count = allocations.count("model2")
+        # Thompson sampling is probabilistic, but model1 should generally be favored
+        assert model1_count > 0
+        assert model2_count > 0