clfegg commited on
Commit
2b8cf41
·
verified ·
1 Parent(s): 97be00a

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +29 -0
handler.py CHANGED
@@ -4,6 +4,35 @@ import numpy as np
4
  import pandas as pd
5
  import os
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  class EndpointHandler:
8
  def __init__(self, path=""):
9
  model_path = os.path.join(path, "model.pkl")
 
4
  import pandas as pd
5
  import os
6
 
7
+ class ContentBasedRecommender:
8
+ def __init__(self, train_data):
9
+ self.train_data = train_data
10
+
11
+ def predict(self, user_id, k=10):
12
+ user_books = set(self.train_data[self.train_data['user_id'] == user_id]['book_id'])
13
+ similar_books = set().union(*(self.train_data[self.train_data['book_id'] == book_id]['similar_books'].iloc[0] for book_id in user_books))
14
+ recommended_books = list(similar_books - user_books)
15
+
16
+ return np.random.choice(recommended_books, size=k, replace=False) if len(recommended_books) >= k else recommended_books
17
+
18
+ def evaluate(self, test_data, k=10):
19
+ user_ids = test_data['user_id'].unique()
20
+ hit_rate, ndcg_scores = [], []
21
+
22
+ for user_id in user_ids[:100]:
23
+ true_books = set(test_data[test_data['user_id'] == user_id]['book_id'])
24
+ pred_books = set(self.predict(user_id, k))
25
+
26
+ hits = len(true_books & pred_books)
27
+ hit_rate.append(hits / min(k, len(true_books)))
28
+
29
+ dcg = sum(1 / math.log2(rank + 2) for rank, book in enumerate(pred_books) if book in true_books)
30
+ idcg = sum(1 / math.log2(i + 2) for i in range(min(k, len(true_books))))
31
+ ndcg = dcg / idcg if idcg > 0 else 0
32
+ ndcg_scores.append(ndcg)
33
+
34
+ return np.mean(hit_rate), np.mean(ndcg_scores)
35
+
36
  class EndpointHandler:
37
  def __init__(self, path=""):
38
  model_path = os.path.join(path, "model.pkl")