Spaces:

DawnC
/

PawMatchAI

Running on Zero

File size: 15,569 Bytes

import torch
import numpy as np
from typing import List, Dict, Tuple, Optional
from dataclasses import dataclass
from breed_health_info import breed_health_info
from breed_noise_info import breed_noise_info
from dog_database import dog_data
from scoring_calculation_system import UserPreferences
from sentence_transformers import SentenceTransformer, util

class SmartBreedMatcher:
    def __init__(self, dog_data: List[Tuple]):
        self.dog_data = dog_data
        self.model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

    def _categorize_breeds(self) -> Dict:
        """自動將狗品種分類"""
        categories = {
            'working_dogs': [],
            'herding_dogs': [],
            'hunting_dogs': [],
            'companion_dogs': [],
            'guard_dogs': []
        }
        
        for breed_info in self.dog_data:
            description = breed_info[9].lower()
            temperament = breed_info[4].lower()
            
            # 根據描述和性格特徵自動分類
            if any(word in description for word in ['herding', 'shepherd', 'cattle', 'flock']):
                categories['herding_dogs'].append(breed_info[1])
            elif any(word in description for word in ['hunting', 'hunt', 'retriever', 'pointer']):
                categories['hunting_dogs'].append(breed_info[1])
            elif any(word in description for word in ['companion', 'toy', 'family', 'lap']):
                categories['companion_dogs'].append(breed_info[1])
            elif any(word in description for word in ['guard', 'protection', 'watchdog']):
                categories['guard_dogs'].append(breed_info[1])
            elif any(word in description for word in ['working', 'draft', 'cart']):
                categories['working_dogs'].append(breed_info[1])
                
        return categories

    def find_similar_breeds(self, breed_name: str, top_n: int = 5) -> List[Tuple[str, float]]:
        """找出與指定品種最相似的其他品種"""
        target_breed = next((breed for breed in self.dog_data if breed[1] == breed_name), None)
        if not target_breed:
            return []
            
        # 獲取目標品種的特徵
        target_features = {
            'breed_name': target_breed[1],  # 添加品種名稱
            'size': target_breed[2],
            'temperament': target_breed[4],
            'exercise': target_breed[7],
            'description': target_breed[9]
        }
        
        similarities = []
        for breed in self.dog_data:
            if breed[1] != breed_name:
                breed_features = {
                    'breed_name': breed[1],  # 添加品種名稱
                    'size': breed[2],
                    'temperament': breed[4],
                    'exercise': breed[7],
                    'description': breed[9]
                }
                
                similarity_score = self._calculate_breed_similarity(target_features, breed_features)
                similarities.append((breed[1], similarity_score))
        
        return sorted(similarities, key=lambda x: x[1], reverse=True)[:top_n]


    def _calculate_breed_similarity(self, breed1_features: Dict, breed2_features: Dict) -> float:
        """計算兩個品種之間的相似度，包含健康和噪音因素"""
        # 計算描述文本的相似度
        desc1_embedding = self.model.encode(breed1_features['description'])
        desc2_embedding = self.model.encode(breed2_features['description'])
        description_similarity = float(util.pytorch_cos_sim(desc1_embedding, desc2_embedding))
        
        # 基本特徵相似度
        size_similarity = 1.0 if breed1_features['size'] == breed2_features['size'] else 0.5
        exercise_similarity = 1.0 if breed1_features['exercise'] == breed2_features['exercise'] else 0.5
        
        # 性格相似度
        temp1_embedding = self.model.encode(breed1_features['temperament'])
        temp2_embedding = self.model.encode(breed2_features['temperament'])
        temperament_similarity = float(util.pytorch_cos_sim(temp1_embedding, temp2_embedding))
        
        # 健康分數相似度
        health_score1 = self._calculate_health_score(breed1_features['breed_name'])
        health_score2 = self._calculate_health_score(breed2_features['breed_name'])
        health_similarity = 1.0 - abs(health_score1 - health_score2)
        
        # 噪音水平相似度
        noise_similarity = self._calculate_noise_similarity(
            breed1_features['breed_name'],
            breed2_features['breed_name']
        )
        
        # 加權計算
        weights = {
            'description': 0.25,
            'temperament': 0.20,
            'exercise': 0.15,
            'size': 0.10,
            'health': 0.15,
            'noise': 0.15
        }
        
        final_similarity = (
            description_similarity * weights['description'] +
            temperament_similarity * weights['temperament'] +
            exercise_similarity * weights['exercise'] +
            size_similarity * weights['size'] +
            health_similarity * weights['health'] +
            noise_similarity * weights['noise']
        )
        
        return final_similarity


    def _calculate_final_scores(self, breed_name: str, base_scores: Dict, 
                              smart_score: float, is_preferred: bool, 
                              similarity_score: float = 0.0) -> Dict:
        """
        計算最終分數，包含基礎分數和獎勵分數
        
        Args:
            breed_name: 品種名稱
            base_scores: 基礎評分 (空間、運動等)
            smart_score: 智能匹配分數
            is_preferred: 是否為用戶指定品種
            similarity_score: 與指定品種的相似度 (0-1)
        """
        # 基礎權重
        weights = {
            'base': 0.6,      # 基礎分數權重
            'smart': 0.25,    # 智能匹配權重
            'bonus': 0.15     # 獎勵分數權重
        }
        
        # 計算基礎分數
        base_score = base_scores.get('overall', 0.7)
        
        # 計算獎勵分數
        bonus_score = 0.0
        if is_preferred:
            # 用戶指定品種獲得最高獎勵
            bonus_score = 0.95
        elif similarity_score > 0:
            # 相似品種獲得部分獎勵，但不超過80%的最高獎勵
            bonus_score = min(0.8, similarity_score) * 0.95
        
        # 計算最終分數
        final_score = (
            base_score * weights['base'] +
            smart_score * weights['smart'] +
            bonus_score * weights['bonus']
        )
        
        # 更新各項分數
        scores = base_scores.copy()
        
        # 如果是用戶指定品種，稍微提升各項基礎分數，但保持合理範圍
        if is_preferred:
            for key in scores:
                if key != 'overall':
                    scores[key] = min(1.0, scores[key] * 1.1)  # 最多提升10%
        
        # 為相似品種調整分數
        elif similarity_score > 0:
            boost_factor = 1.0 + (similarity_score * 0.05)  # 最多提升5%
            for key in scores:
                if key != 'overall':
                    scores[key] = min(0.95, scores[key] * boost_factor)  # 確保不超過95%
        
        return {
            'final_score': round(final_score, 4),
            'base_score': round(base_score, 4),
            'bonus_score': round(bonus_score, 4),
            'scores': {k: round(v, 4) for k, v in scores.items()}
        }

    def _calculate_health_score(self, breed_name: str) -> float:
        """計算品種的健康分數"""
        if breed_name not in breed_health_info:
            return 0.5

        health_notes = breed_health_info[breed_name]['health_notes'].lower()

        # 嚴重健康問題
        severe_conditions = [
            'cancer', 'cardiomyopathy', 'epilepsy', 'dysplasia',
            'bloat', 'progressive', 'syndrome'
        ]

        # 中等健康問題
        moderate_conditions = [
            'allergies', 'infections', 'thyroid', 'luxation',
            'skin problems', 'ear'
        ]

        severe_count = sum(1 for condition in severe_conditions if condition in health_notes)
        moderate_count = sum(1 for condition in moderate_conditions if condition in health_notes)

        health_score = 1.0
        health_score -= (severe_count * 0.1)
        health_score -= (moderate_count * 0.05)

        # 特殊條件調整（根據用戶偏好）
        if hasattr(self, 'user_preferences'):
            if self.user_preferences.has_children:
                if 'requires frequent' in health_notes or 'regular monitoring' in health_notes:
                    health_score *= 0.9

            if self.user_preferences.health_sensitivity == 'high':
                health_score *= 0.9

        return max(0.3, min(1.0, health_score))

    

    def _calculate_noise_similarity(self, breed1: str, breed2: str) -> float:
        """計算兩個品種的噪音相似度"""
        noise_levels = {
            'Low': 1,
            'Moderate': 2,
            'High': 3,
            'Unknown': 2  # 默認為中等
        }
        
        noise1 = breed_noise_info.get(breed1, {}).get('noise_level', 'Unknown')
        noise2 = breed_noise_info.get(breed2, {}).get('noise_level', 'Unknown')
        
        # 獲取數值級別
        level1 = noise_levels.get(noise1, 2)
        level2 = noise_levels.get(noise2, 2)
        
        # 計算差異並歸一化
        difference = abs(level1 - level2)
        similarity = 1.0 - (difference / 2)  # 最大差異是2，所以除以2來歸一化
        
        return similarity

    def _general_matching(self, description: str, top_n: int = 10) -> List[Dict]:
        """基本的品種匹配邏輯，考慮描述、性格、噪音和健康因素"""
        matches = []
        for breed in self.dog_data:
            breed_name = breed[1]
            breed_description = breed[9]
            temperament = breed[4]
            
            # 計算描述文本和性格的相似度
            desc_embedding = self.model.encode(description)
            breed_desc_embedding = self.model.encode(breed_description)
            breed_temp_embedding = self.model.encode(temperament)
            
            desc_similarity = float(util.pytorch_cos_sim(desc_embedding, breed_desc_embedding))
            temp_similarity = float(util.pytorch_cos_sim(desc_embedding, breed_temp_embedding))
            
            # 計算噪音相似度和健康分數
            noise_similarity = self._calculate_noise_similarity(breed_name, breed_name)
            health_score = self._calculate_health_score(breed_name)
            health_similarity = 1.0 - abs(health_score - 0.8)  # 假設理想健康分數為 0.8
            
            # 加權計算分數
            weights = {
                'description': 0.35,
                'temperament': 0.25,
                'noise': 0.2,
                'health': 0.2
            }
    
            # 計算最終分數
            final_score = (
                desc_similarity * weights['description'] +
                temp_similarity * weights['temperament'] +
                noise_similarity * weights['noise'] +
                health_similarity * weights['health']
            )
            
            matches.append({
                'breed': breed_name,
                'score': final_score,
                'is_preferred': False,
                'similarity': final_score,
                'reason': "Matched based on description, temperament, noise level, and health score"
            })
        
        # 排序並返回前 N 個匹配結果
        return sorted(matches, key=lambda x: -x['score'])[:top_n]


    def match_user_preference(self, description: str, top_n: int = 10) -> List[Dict]:
        """根據用戶描述匹配最適合的品種"""
        preferred_breed = self._detect_breed_preference(description)
        
        matches = []
        if preferred_breed:
            similar_breeds = self.find_similar_breeds(preferred_breed, top_n=top_n)
            
            # 首先添加偏好品種
            breed_info = next((breed for breed in self.dog_data if breed[1] == preferred_breed), None)
            if breed_info:
                health_score = self._calculate_health_score(preferred_breed)
                noise_info = breed_noise_info.get(preferred_breed, {
                    "noise_level": "Unknown",
                    "noise_notes": "No noise information available"
                })
                
                # 偏好品種必定是最高分
                matches.append({
                    'breed': preferred_breed,
                    'score': 1.0,
                    'is_preferred': True,
                    'similarity': 1.0,
                    'health_score': health_score,
                    'noise_level': noise_info['noise_level'],
                    'reason': "Directly matched your preferred breed"
                })
            
            # 添加相似品種
            for breed_name, similarity in similar_breeds:
                if breed_name != preferred_breed:
                    health_score = self._calculate_health_score(breed_name)
                    noise_info = breed_noise_info.get(breed_name, {
                        "noise_level": "Unknown",
                        "noise_notes": "No noise information available"
                    })
                    
                    # 調整相似品種分數計算
                    base_similarity = similarity * 0.6
                    health_factor = health_score * 0.2
                    noise_factor = self._calculate_noise_similarity(preferred_breed, breed_name) * 0.2
                    
                    # 確保相似品種分數不會超過偏好品種
                    final_score = min(0.95, base_similarity + health_factor + noise_factor)
                    
                    matches.append({
                        'breed': breed_name,
                        'score': final_score,
                        'is_preferred': False,
                        'similarity': similarity,
                        'health_score': health_score,
                        'noise_level': noise_info['noise_level'],
                        'reason': f"Similar to {preferred_breed} in characteristics, health profile, and noise level"
                    })
        else:
            matches = self._general_matching(description, top_n)
        
        return sorted(matches, 
                    key=lambda x: (-int(x.get('is_preferred', False)), 
                                -x['score'], # 降序排列
                                x['breed']))[:top_n]

    def _detect_breed_preference(self, description: str) -> Optional[str]:
        """檢測用戶是否提到特定品種"""
        description_lower = description.lower()
        
        for breed_info in self.dog_data:
            breed_name = breed_info[1]
            normalized_breed = breed_name.lower().replace('_', ' ')
            
            if any(phrase in description_lower for phrase in [
                f"love {normalized_breed}",
                f"like {normalized_breed}",
                f"prefer {normalized_breed}",
                f"want {normalized_breed}",
                normalized_breed
            ]):
                return breed_name
                
        return None