DawnC commited on
Commit
336a3c7
1 Parent(s): ee184bc

Delete smart_breed_matcher.py

Browse files
Files changed (1) hide show
  1. smart_breed_matcher.py +0 -376
smart_breed_matcher.py DELETED
@@ -1,376 +0,0 @@
1
- import torch
2
- import numpy as np
3
- from typing import List, Dict, Tuple, Optional
4
- from dataclasses import dataclass
5
- from breed_health_info import breed_health_info
6
- from breed_noise_info import breed_noise_info
7
- from dog_database import dog_data
8
- from scoring_calculation_system import UserPreferences
9
- from sentence_transformers import SentenceTransformer, util
10
-
11
- class SmartBreedMatcher:
12
- def __init__(self, dog_data: List[Tuple]):
13
- self.dog_data = dog_data
14
- self.model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
15
-
16
- def _categorize_breeds(self) -> Dict:
17
- """自動將狗品種分類"""
18
- categories = {
19
- 'working_dogs': [],
20
- 'herding_dogs': [],
21
- 'hunting_dogs': [],
22
- 'companion_dogs': [],
23
- 'guard_dogs': []
24
- }
25
-
26
- for breed_info in self.dog_data:
27
- description = breed_info[9].lower()
28
- temperament = breed_info[4].lower()
29
-
30
- # 根據描述和性格特徵自動分類
31
- if any(word in description for word in ['herding', 'shepherd', 'cattle', 'flock']):
32
- categories['herding_dogs'].append(breed_info[1])
33
- elif any(word in description for word in ['hunting', 'hunt', 'retriever', 'pointer']):
34
- categories['hunting_dogs'].append(breed_info[1])
35
- elif any(word in description for word in ['companion', 'toy', 'family', 'lap']):
36
- categories['companion_dogs'].append(breed_info[1])
37
- elif any(word in description for word in ['guard', 'protection', 'watchdog']):
38
- categories['guard_dogs'].append(breed_info[1])
39
- elif any(word in description for word in ['working', 'draft', 'cart']):
40
- categories['working_dogs'].append(breed_info[1])
41
-
42
- return categories
43
-
44
- def find_similar_breeds(self, breed_name: str, top_n: int = 5) -> List[Tuple[str, float]]:
45
- """找出與指定品種最相似的其他品種"""
46
- target_breed = next((breed for breed in self.dog_data if breed[1] == breed_name), None)
47
- if not target_breed:
48
- return []
49
-
50
- # 獲取目標品種的特徵
51
- target_features = {
52
- 'breed_name': target_breed[1], # 添加品種名稱
53
- 'size': target_breed[2],
54
- 'temperament': target_breed[4],
55
- 'exercise': target_breed[7],
56
- 'description': target_breed[9]
57
- }
58
-
59
- similarities = []
60
- for breed in self.dog_data:
61
- if breed[1] != breed_name:
62
- breed_features = {
63
- 'breed_name': breed[1], # 添加品種名稱
64
- 'size': breed[2],
65
- 'temperament': breed[4],
66
- 'exercise': breed[7],
67
- 'description': breed[9]
68
- }
69
-
70
- similarity_score = self._calculate_breed_similarity(target_features, breed_features)
71
- similarities.append((breed[1], similarity_score))
72
-
73
- return sorted(similarities, key=lambda x: x[1], reverse=True)[:top_n]
74
-
75
-
76
- def _calculate_breed_similarity(self, breed1_features: Dict, breed2_features: Dict) -> float:
77
- """計算兩個品種之間的相似度,包含健康和噪音因素"""
78
- # 計算描述文本的相似度
79
- desc1_embedding = self.model.encode(breed1_features['description'])
80
- desc2_embedding = self.model.encode(breed2_features['description'])
81
- description_similarity = float(util.pytorch_cos_sim(desc1_embedding, desc2_embedding))
82
-
83
- # 基本特徵相似度
84
- size_similarity = 1.0 if breed1_features['size'] == breed2_features['size'] else 0.5
85
- exercise_similarity = 1.0 if breed1_features['exercise'] == breed2_features['exercise'] else 0.5
86
-
87
- # 性格相似度
88
- temp1_embedding = self.model.encode(breed1_features['temperament'])
89
- temp2_embedding = self.model.encode(breed2_features['temperament'])
90
- temperament_similarity = float(util.pytorch_cos_sim(temp1_embedding, temp2_embedding))
91
-
92
- # 健康分數相似度
93
- health_score1 = self._calculate_health_score(breed1_features['breed_name'])
94
- health_score2 = self._calculate_health_score(breed2_features['breed_name'])
95
- health_similarity = 1.0 - abs(health_score1 - health_score2)
96
-
97
- # 噪音水平相似度
98
- noise_similarity = self._calculate_noise_similarity(
99
- breed1_features['breed_name'],
100
- breed2_features['breed_name']
101
- )
102
-
103
- # 加權計算
104
- weights = {
105
- 'description': 0.25,
106
- 'temperament': 0.20,
107
- 'exercise': 0.15,
108
- 'size': 0.10,
109
- 'health': 0.15,
110
- 'noise': 0.15
111
- }
112
-
113
- final_similarity = (
114
- description_similarity * weights['description'] +
115
- temperament_similarity * weights['temperament'] +
116
- exercise_similarity * weights['exercise'] +
117
- size_similarity * weights['size'] +
118
- health_similarity * weights['health'] +
119
- noise_similarity * weights['noise']
120
- )
121
-
122
- return final_similarity
123
-
124
-
125
- def _calculate_final_scores(self, breed_name: str, base_scores: Dict,
126
- smart_score: float, is_preferred: bool,
127
- similarity_score: float = 0.0) -> Dict:
128
- """
129
- 計算最終分數,包含基礎分數和獎勵分數
130
-
131
- Args:
132
- breed_name: 品種名稱
133
- base_scores: 基礎評分 (空間、運動等)
134
- smart_score: 智能匹配分數
135
- is_preferred: 是否為用戶指定品種
136
- similarity_score: 與指定品種的相似度 (0-1)
137
- """
138
- # 基礎權重
139
- weights = {
140
- 'base': 0.6, # 基礎分數權重
141
- 'smart': 0.25, # 智能匹配權重
142
- 'bonus': 0.15 # 獎勵分數權重
143
- }
144
-
145
- # 計算基礎分數
146
- base_score = base_scores.get('overall', 0.7)
147
-
148
- # 計算獎勵分數
149
- bonus_score = 0.0
150
- if is_preferred:
151
- # 用戶指定品種獲得最高獎勵
152
- bonus_score = 0.95
153
- elif similarity_score > 0:
154
- # 相似品種獲得部分獎勵,但不超過80%的最高獎勵
155
- bonus_score = min(0.8, similarity_score) * 0.95
156
-
157
- # 計算最終分數
158
- final_score = (
159
- base_score * weights['base'] +
160
- smart_score * weights['smart'] +
161
- bonus_score * weights['bonus']
162
- )
163
-
164
- # 更新各項分數
165
- scores = base_scores.copy()
166
-
167
- # 如果是用戶指定品種,稍微提升各項基礎分數,但保持合理範圍
168
- if is_preferred:
169
- for key in scores:
170
- if key != 'overall':
171
- scores[key] = min(1.0, scores[key] * 1.1) # 最多提升10%
172
-
173
- # 為相似品種調整分數
174
- elif similarity_score > 0:
175
- boost_factor = 1.0 + (similarity_score * 0.05) # 最多提升5%
176
- for key in scores:
177
- if key != 'overall':
178
- scores[key] = min(0.95, scores[key] * boost_factor) # 確保不超過95%
179
-
180
- return {
181
- 'final_score': round(final_score, 4),
182
- 'base_score': round(base_score, 4),
183
- 'bonus_score': round(bonus_score, 4),
184
- 'scores': {k: round(v, 4) for k, v in scores.items()}
185
- }
186
-
187
- def _calculate_health_score(self, breed_name: str) -> float:
188
- """計算品種的健康分數"""
189
- if breed_name not in breed_health_info:
190
- return 0.5
191
-
192
- health_notes = breed_health_info[breed_name]['health_notes'].lower()
193
-
194
- # 嚴重健康問題
195
- severe_conditions = [
196
- 'cancer', 'cardiomyopathy', 'epilepsy', 'dysplasia',
197
- 'bloat', 'progressive', 'syndrome'
198
- ]
199
-
200
- # 中等健康問題
201
- moderate_conditions = [
202
- 'allergies', 'infections', 'thyroid', 'luxation',
203
- 'skin problems', 'ear'
204
- ]
205
-
206
- severe_count = sum(1 for condition in severe_conditions if condition in health_notes)
207
- moderate_count = sum(1 for condition in moderate_conditions if condition in health_notes)
208
-
209
- health_score = 1.0
210
- health_score -= (severe_count * 0.1)
211
- health_score -= (moderate_count * 0.05)
212
-
213
- # 特殊條件調整(根據用戶偏好)
214
- if hasattr(self, 'user_preferences'):
215
- if self.user_preferences.has_children:
216
- if 'requires frequent' in health_notes or 'regular monitoring' in health_notes:
217
- health_score *= 0.9
218
-
219
- if self.user_preferences.health_sensitivity == 'high':
220
- health_score *= 0.9
221
-
222
- return max(0.3, min(1.0, health_score))
223
-
224
-
225
-
226
- def _calculate_noise_similarity(self, breed1: str, breed2: str) -> float:
227
- """計算兩個品種的噪音相似度"""
228
- noise_levels = {
229
- 'Low': 1,
230
- 'Moderate': 2,
231
- 'High': 3,
232
- 'Unknown': 2 # 默認為中等
233
- }
234
-
235
- noise1 = breed_noise_info.get(breed1, {}).get('noise_level', 'Unknown')
236
- noise2 = breed_noise_info.get(breed2, {}).get('noise_level', 'Unknown')
237
-
238
- # 獲取數值級別
239
- level1 = noise_levels.get(noise1, 2)
240
- level2 = noise_levels.get(noise2, 2)
241
-
242
- # 計算差異並歸一化
243
- difference = abs(level1 - level2)
244
- similarity = 1.0 - (difference / 2) # 最大差異是2,所以除以2來歸一化
245
-
246
- return similarity
247
-
248
- def _general_matching(self, description: str, top_n: int = 10) -> List[Dict]:
249
- """基本的品種匹配邏輯,考慮描述、性格、噪音和健康因素"""
250
- matches = []
251
- for breed in self.dog_data:
252
- breed_name = breed[1]
253
- breed_description = breed[9]
254
- temperament = breed[4]
255
-
256
- # 計算描述文本和性格的相似度
257
- desc_embedding = self.model.encode(description)
258
- breed_desc_embedding = self.model.encode(breed_description)
259
- breed_temp_embedding = self.model.encode(temperament)
260
-
261
- desc_similarity = float(util.pytorch_cos_sim(desc_embedding, breed_desc_embedding))
262
- temp_similarity = float(util.pytorch_cos_sim(desc_embedding, breed_temp_embedding))
263
-
264
- # 計算噪音相似度和健康分數
265
- noise_similarity = self._calculate_noise_similarity(breed_name, breed_name)
266
- health_score = self._calculate_health_score(breed_name)
267
- health_similarity = 1.0 - abs(health_score - 0.8) # 假設理想健康分數為 0.8
268
-
269
- # 加權計算分數
270
- weights = {
271
- 'description': 0.35,
272
- 'temperament': 0.25,
273
- 'noise': 0.2,
274
- 'health': 0.2
275
- }
276
-
277
- # 計算最終分數
278
- final_score = (
279
- desc_similarity * weights['description'] +
280
- temp_similarity * weights['temperament'] +
281
- noise_similarity * weights['noise'] +
282
- health_similarity * weights['health']
283
- )
284
-
285
- matches.append({
286
- 'breed': breed_name,
287
- 'score': final_score,
288
- 'is_preferred': False,
289
- 'similarity': final_score,
290
- 'reason': "Matched based on description, temperament, noise level, and health score"
291
- })
292
-
293
- # 排序並返回前 N 個匹配結果
294
- return sorted(matches, key=lambda x: -x['score'])[:top_n]
295
-
296
-
297
- def match_user_preference(self, description: str, top_n: int = 10) -> List[Dict]:
298
- """根據用戶描述匹配最適合的品種"""
299
- preferred_breed = self._detect_breed_preference(description)
300
-
301
- matches = []
302
- if preferred_breed:
303
- similar_breeds = self.find_similar_breeds(preferred_breed, top_n=top_n)
304
-
305
- # 首先添加偏好品種
306
- breed_info = next((breed for breed in self.dog_data if breed[1] == preferred_breed), None)
307
- if breed_info:
308
- health_score = self._calculate_health_score(preferred_breed)
309
- noise_info = breed_noise_info.get(preferred_breed, {
310
- "noise_level": "Unknown",
311
- "noise_notes": "No noise information available"
312
- })
313
-
314
- # 偏好品種必定是最高分
315
- matches.append({
316
- 'breed': preferred_breed,
317
- 'score': 1.0,
318
- 'is_preferred': True,
319
- 'similarity': 1.0,
320
- 'health_score': health_score,
321
- 'noise_level': noise_info['noise_level'],
322
- 'reason': "Directly matched your preferred breed"
323
- })
324
-
325
- # 添加相似品種
326
- for breed_name, similarity in similar_breeds:
327
- if breed_name != preferred_breed:
328
- health_score = self._calculate_health_score(breed_name)
329
- noise_info = breed_noise_info.get(breed_name, {
330
- "noise_level": "Unknown",
331
- "noise_notes": "No noise information available"
332
- })
333
-
334
- # 調整相似品種分數計算
335
- base_similarity = similarity * 0.6
336
- health_factor = health_score * 0.2
337
- noise_factor = self._calculate_noise_similarity(preferred_breed, breed_name) * 0.2
338
-
339
- # 確保相似品種分數不會超過偏好品種
340
- final_score = min(0.95, base_similarity + health_factor + noise_factor)
341
-
342
- matches.append({
343
- 'breed': breed_name,
344
- 'score': final_score,
345
- 'is_preferred': False,
346
- 'similarity': similarity,
347
- 'health_score': health_score,
348
- 'noise_level': noise_info['noise_level'],
349
- 'reason': f"Similar to {preferred_breed} in characteristics, health profile, and noise level"
350
- })
351
- else:
352
- matches = self._general_matching(description, top_n)
353
-
354
- return sorted(matches,
355
- key=lambda x: (-int(x.get('is_preferred', False)),
356
- -x['score'], # 降序排列
357
- x['breed']))[:top_n]
358
-
359
- def _detect_breed_preference(self, description: str) -> Optional[str]:
360
- """檢測用戶是否提到特定��種"""
361
- description_lower = description.lower()
362
-
363
- for breed_info in self.dog_data:
364
- breed_name = breed_info[1]
365
- normalized_breed = breed_name.lower().replace('_', ' ')
366
-
367
- if any(phrase in description_lower for phrase in [
368
- f"love {normalized_breed}",
369
- f"like {normalized_breed}",
370
- f"prefer {normalized_breed}",
371
- f"want {normalized_breed}",
372
- normalized_breed
373
- ]):
374
- return breed_name
375
-
376
- return None