import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import json
import logging
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader
import os
from train_model import HybridMusicRecommender, MusicRecommenderDataset

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Add safe globals for numpy types
torch.serialization.add_safe_globals([
    np.generic,  # Allow numpy scalar types
    np.ndarray,  # Allow numpy arrays
    np.dtype,    # Allow numpy dtypes
    np.float64,  # Allow specific numpy types
    np.float32,
    np.int64,
    np.int32
])

class RecommendationGenerator:
    def __init__(self, model_path: str, catalog_data: pd.DataFrame, encoders_path: str):
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.catalog_data = catalog_data
        
        # Load model checkpoint with safety settings
        logger.info(f"Loading model from {model_path}")
        try:
            self.checkpoint = torch.load(model_path, map_location=self.device)
            logger.info("Model loaded successfully")
        except Exception as e:
            logger.error(f"Error loading model: {str(e)}")
            raise
        
        # Get config and encoders from the checkpoint
        self.config = self.checkpoint.get('config', {})
        if not self.config:
            # Try loading from config file as fallback
            try:
                with open('config/model_config.json', 'r') as f:
                    self.config = json.load(f)
            except FileNotFoundError:
                logger.warning("Config file not found, using default values")
                self.config = {
                    'embedding_dim': 64,
                    'hidden_layers': [256, 128, 64],
                    'dropout': 0.3
                }
        
        # Load encoders with safety settings
        torch.serialization.add_safe_globals([LabelEncoder])
        self.encoders = torch.load(encoders_path, weights_only=False)
        
        # Print shape info for debugging
        logger.info("Encoder class counts:")
        for key, encoder in self.encoders.items():
            if isinstance(encoder, LabelEncoder):
                logger.info(f"{key}: {len(encoder.classes_)}")
        
        # Get state dict dimensions with safety checks
        state_dict = self.checkpoint['model_state_dict']
        self.embedding_dims = {
            'num_users': state_dict['user_embedding.weight'].shape[0],
            'num_music': state_dict['music_embedding.weight'].shape[0],
            'num_artists': state_dict['artist_embedding.weight'].shape[0],
            'num_genres': len(self.encoders['genre_encoder'].classes_),
            'num_numerical': 12
        }
        
        logger.info("Model dimensions from state dict:")
        for key, value in self.embedding_dims.items():
            logger.info(f"{key}: {value}")
        
        # Safety check for catalog data
        max_music_id = self.catalog_data['music_id'].nunique()
        if max_music_id >= self.embedding_dims['num_music']:
            logger.warning(f"Catalog contains music IDs larger than model capacity. Filtering out excess items.")
            valid_music_ids = set(self.encoders['music_encoder'].transform(
                self.encoders['music_encoder'].classes_[:self.embedding_dims['num_music']]
            ))
            self.catalog_data = self.catalog_data[
                self.catalog_data['music_id'].apply(
                    lambda x: self.encoders['music_encoder'].transform([x])[0] in valid_music_ids
                )
            ]
            logger.info(f"Filtered catalog size: {len(self.catalog_data)}")
        
        self.model = self._initialize_model(self.embedding_dims)
        
    def _initialize_model(self, embedding_dims):
        """Initialize and load the model from checkpoint."""
        # Get dimensions from encoders
        model = HybridMusicRecommender(
            num_users=embedding_dims['num_users'],
            num_music=embedding_dims['num_music'],
            num_artists=embedding_dims['num_artists'],
            num_genres=embedding_dims['num_genres'],
            num_numerical=embedding_dims['num_numerical'],
            embedding_dim=64,
            layers=[256, 128, 64],
            dropout=0.2
        )
        
        # Load state dict from checkpoint
        state_dict = self.checkpoint['model_state_dict']
        model.load_state_dict(state_dict)
        
        # Move model to device and set to eval mode
        model = model.to(self.device)
        model.eval()
        
        return model
    
    def generate_recommendations(self, user_info: dict, n_recommendations: int = 10) -> pd.DataFrame:
        """
        Generate music recommendations for a specific user.
        
        Args:
            user_info: Dictionary containing user information (age, gender, user_id)
            n_recommendations: Number of recommendations to generate
            
        Returns:
            DataFrame containing recommended songs with predicted play counts
        """
        # Create a temporary DataFrame with all songs for the user
        user_candidates = self.catalog_data.copy()
        user_candidates['age'] = user_info['age']
        user_candidates['gender'] = user_info['gender']
        user_candidates['user_id'] = user_info['user_id']
        
        # Debug user encoding with more detailed error handling
        try:
            encoded_user = self.encoders['user_encoder'].transform([user_info['user_id']])[0]
            logger.info(f"User ID {user_info['user_id']} encoded as: {encoded_user}")
        except Exception as e:
            logger.warning(f"Error encoding user ID: {str(e)}")
            logger.warning("Using default encoding (0)")
            encoded_user = 0
            user_candidates['user_id'] = '0'  # Use default user ID
        
        # Debug catalog data
        print(f"\nCatalog Statistics:")
        print(f"Total songs: {len(user_candidates)}")
        print(f"Unique artists: {user_candidates['artist_name'].nunique()}")
        print(f"Unique genres: {user_candidates['main_genre'].nunique()}")
        
        try:
            # Create dataset with safety checks
            test_dataset = MusicRecommenderDataset(
                user_candidates,
                mode='test',
                encoders=self.encoders,
                embedding_dims=self.embedding_dims  # Pass embedding dimensions
            )
            test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)
            
            # Generate predictions
            predictions = []
            indices = []
            
            with torch.no_grad():
                for i, batch in enumerate(test_loader):
                    batch = {k: v.to(self.device) for k, v in batch.items()}
                    pred = self.model(batch)
                    predictions.extend(pred.cpu().numpy())
                    indices.extend(range(i * test_loader.batch_size, 
                                      min((i + 1) * test_loader.batch_size, len(test_dataset))))
        except Exception as e:
            logger.error(f"Error generating recommendations: {str(e)}")
            raise
        
        # Create recommendations DataFrame and ensure uniqueness
        recommendations = pd.DataFrame({
            'music': user_candidates['music'].values[indices],
            'artist_name': user_candidates['artist_name'].values[indices],
            'genre': user_candidates['main_genre'].values[indices],
            'predicted_plays': predictions
        })
        
        # Drop duplicates keeping first occurrence (highest predicted play count)
        recommendations = recommendations.drop_duplicates(subset=['music'], keep='first')
        
        # Convert predictions to scalar values and sort
        recommendations['predicted_plays'] = recommendations['predicted_plays'].apply(lambda x: float(x[0]))
        
        # Sort by predicted plays and get top N recommendations
        recommendations = recommendations.sort_values('predicted_plays', ascending=False)
        recommendations = recommendations.head(n_recommendations)
        
        # Debug predictions
        print(f"\nPrediction Statistics:")
        min_pred = recommendations['predicted_plays'].min()
        max_pred = recommendations['predicted_plays'].max()
        std_pred = recommendations['predicted_plays'].std()
        print(f"Prediction range: {min_pred:.2f} to {max_pred:.2f}")
        print(f"Prediction std: {std_pred:.2f}")
        
        # Print top recommendations with better formatting
        print("\nTop 10 Recommended Songs:")
        pd.set_option('display.max_columns', None)
        pd.set_option('display.width', None)
        print(recommendations.to_string(index=False, float_format=lambda x: '{:.2f}'.format(x) if isinstance(x, (float, np.float32, np.float64)) else str(x)))
        
        return recommendations.reset_index(drop=True)

class HybridMusicRecommender(nn.Module):
    def __init__(self, num_users, num_music, num_artists, num_genres, num_numerical,
                 embedding_dim=64, layers=[256, 128, 64], dropout=0.2):
        super().__init__()
        
        # Embedding layers
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.music_embedding = nn.Embedding(num_music, embedding_dim)
        self.artist_embedding = nn.Embedding(num_artists, embedding_dim)
        self.genre_embedding = nn.Embedding(num_genres, embedding_dim)
        
        # Feature processing layers with residual connections
        self.numerical_layer = nn.Sequential(
            nn.Linear(num_numerical, embedding_dim),
            nn.ReLU(),
            nn.BatchNorm1d(embedding_dim)
        )
        
        self.binary_layer = nn.Sequential(
            nn.Linear(2, embedding_dim),
            nn.ReLU(),
            nn.BatchNorm1d(embedding_dim)
        )
        
        # Calculate total input features
        total_features = embedding_dim * 6  # 4 embeddings + numerical + binary
        
        # MLP layers with residual connections
        self.fc_layers = nn.ModuleList()
        input_dim = total_features
        
        for layer_size in layers:
            self.fc_layers.append(nn.ModuleDict({
                'main': nn.Sequential(
                    nn.Linear(input_dim, layer_size),
                    nn.ReLU(),
                    nn.BatchNorm1d(layer_size),
                    nn.Dropout(dropout)
                ),
                'residual': nn.Linear(input_dim, layer_size) if input_dim != layer_size else None
            }))
            input_dim = layer_size
        
        self.final_layer = nn.Linear(layers[-1], 1)
    
    def forward(self, batch):
        # Get embeddings
        user_emb = self.user_embedding(batch['user_id'])
        music_emb = self.music_embedding(batch['music_id'])
        artist_emb = self.artist_embedding(batch['artist_id'])
        genre_emb = self.genre_embedding(batch['genre_id'])
        
        # Process numerical features
        numerical = self.numerical_layer(batch['numerical_features'])
        
        # Process binary features
        binary = torch.stack([batch['explicit'], batch['gender']], dim=1).float()
        binary = self.binary_layer(binary)
        
        # Concatenate all features
        x = torch.cat([
            user_emb, music_emb, artist_emb, genre_emb, numerical, binary
        ], dim=1)
        
        # Apply MLP layers with residual connections
        for layer in self.fc_layers:
            identity = x
            x = layer['main'](x)
            if layer['residual'] is not None:
                x = x + layer['residual'](identity)
        
        # Final prediction
        return self.final_layer(x)

def main():
    # Example usage
    BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
    model_path = 'checkpoints/best_model.pth'
    catalog_data = pd.read_csv(os.path.join(BASE_DIR, 'data', 'test_data.csv'))
    encoders_path = os.path.join(BASE_DIR, 'data', 'data_encoders.pt')
    
    # Initialize recommendation generator
    recommender = RecommendationGenerator(model_path, catalog_data, encoders_path)
    
    # Example user
    user_info = {
        'age': 32,
        'gender': 'M',
        'genre': 'Pop',
        'music': 'Shape of You',
        'user_id': '44d39c6e5e7b45bfc2187fb3c89be58c5a3dc6a54d2a0075402c551c14ea1459'
    }
    
    # Generate recommendations
    recommendations = recommender.generate_recommendations(user_info, n_recommendations=10)
    
    print("\nTop 10 Recommended Songs:")
    print(recommendations.to_string(index=False))

if __name__ == "__main__":
    main()