Update handler.py
Browse files- handler.py +29 -0
handler.py
CHANGED
@@ -4,6 +4,35 @@ import numpy as np
|
|
4 |
import pandas as pd
|
5 |
import os
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
class EndpointHandler:
|
8 |
def __init__(self, path=""):
|
9 |
model_path = os.path.join(path, "model.pkl")
|
|
|
4 |
import pandas as pd
|
5 |
import os
|
6 |
|
7 |
+
class ContentBasedRecommender:
|
8 |
+
def __init__(self, train_data):
|
9 |
+
self.train_data = train_data
|
10 |
+
|
11 |
+
def predict(self, user_id, k=10):
|
12 |
+
user_books = set(self.train_data[self.train_data['user_id'] == user_id]['book_id'])
|
13 |
+
similar_books = set().union(*(self.train_data[self.train_data['book_id'] == book_id]['similar_books'].iloc[0] for book_id in user_books))
|
14 |
+
recommended_books = list(similar_books - user_books)
|
15 |
+
|
16 |
+
return np.random.choice(recommended_books, size=k, replace=False) if len(recommended_books) >= k else recommended_books
|
17 |
+
|
18 |
+
def evaluate(self, test_data, k=10):
|
19 |
+
user_ids = test_data['user_id'].unique()
|
20 |
+
hit_rate, ndcg_scores = [], []
|
21 |
+
|
22 |
+
for user_id in user_ids[:100]:
|
23 |
+
true_books = set(test_data[test_data['user_id'] == user_id]['book_id'])
|
24 |
+
pred_books = set(self.predict(user_id, k))
|
25 |
+
|
26 |
+
hits = len(true_books & pred_books)
|
27 |
+
hit_rate.append(hits / min(k, len(true_books)))
|
28 |
+
|
29 |
+
dcg = sum(1 / math.log2(rank + 2) for rank, book in enumerate(pred_books) if book in true_books)
|
30 |
+
idcg = sum(1 / math.log2(i + 2) for i in range(min(k, len(true_books))))
|
31 |
+
ndcg = dcg / idcg if idcg > 0 else 0
|
32 |
+
ndcg_scores.append(ndcg)
|
33 |
+
|
34 |
+
return np.mean(hit_rate), np.mean(ndcg_scores)
|
35 |
+
|
36 |
class EndpointHandler:
|
37 |
def __init__(self, path=""):
|
38 |
model_path = os.path.join(path, "model.pkl")
|