nirajandhakal
commited on
Commit
•
9eb9505
1
Parent(s):
ebe6c12
Update model.py
Browse files
model.py
CHANGED
@@ -2,15 +2,19 @@ import pandas as pd
|
|
2 |
import numpy as np
|
3 |
from sklearn.model_selection import train_test_split
|
4 |
from sklearn.preprocessing import LabelEncoder
|
5 |
-
from sklearn.metrics.pairwise import cosine_similarity
|
6 |
-
import matplotlib.pyplot as plt
|
7 |
from tensorflow.keras.models import Model
|
8 |
from tensorflow.keras.layers import Input, Embedding, Flatten, concatenate, Dense
|
9 |
from tensorflow.keras.optimizers import Adam
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
# Load datasets
|
12 |
-
books = pd.read_csv("../data/
|
13 |
-
ratings = pd.read_csv("../data/
|
14 |
|
15 |
# Preprocess data
|
16 |
user_encoder = LabelEncoder()
|
@@ -19,32 +23,25 @@ book_encoder = LabelEncoder()
|
|
19 |
ratings["user_id"] = user_encoder.fit_transform(ratings["user_id"])
|
20 |
ratings["book_id"] = book_encoder.fit_transform(ratings["book_id"])
|
21 |
|
22 |
-
#
|
23 |
-
|
24 |
-
|
25 |
|
26 |
# Define the neural network model
|
27 |
def build_model(num_users, num_books, embedding_size=50):
|
28 |
"""
|
29 |
Build a recommendation model.
|
30 |
-
|
31 |
Args:
|
32 |
num_users (int): The number of users in the dataset.
|
33 |
num_books (int): The number of books in the dataset.
|
34 |
embedding_size (int, optional): The size of the embedding vectors. Defaults to 50.
|
35 |
-
|
36 |
Returns:
|
37 |
keras.Model: The compiled recommendation model.
|
38 |
"""
|
39 |
user_input = Input(shape=(1,))
|
40 |
book_input = Input(shape=(1,))
|
41 |
|
42 |
-
user_embedding = Embedding(input_dim=num_users, output_dim=embedding_size)(
|
43 |
-
|
44 |
-
)
|
45 |
-
book_embedding = Embedding(input_dim=num_books, output_dim=embedding_size)(
|
46 |
-
book_input
|
47 |
-
)
|
48 |
|
49 |
user_flat = Flatten()(user_embedding)
|
50 |
book_flat = Flatten()(book_embedding)
|
@@ -58,32 +55,33 @@ def build_model(num_users, num_books, embedding_size=50):
|
|
58 |
|
59 |
return model
|
60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
-
|
63 |
-
|
64 |
-
num_users=len(ratings["user_id"].unique()),
|
65 |
-
num_books=len(ratings["book_id"].unique()),
|
66 |
-
)
|
67 |
-
history = model.fit(
|
68 |
-
[train["user_id"], train["book_id"]],
|
69 |
-
train["rating"],
|
70 |
-
epochs=5,
|
71 |
-
batch_size=128,
|
72 |
-
validation_split=0.1,
|
73 |
-
)
|
74 |
-
|
75 |
-
# Plot training and validation loss
|
76 |
-
plt.figure(figsize=(12, 6))
|
77 |
-
plt.plot(history.history["loss"], label="Training Loss")
|
78 |
-
plt.plot(history.history["val_loss"], label="Validation Loss")
|
79 |
-
plt.xlabel("Epoch")
|
80 |
-
plt.ylabel("Loss")
|
81 |
-
plt.legend()
|
82 |
-
plt.show()
|
83 |
-
|
84 |
-
# Save the model
|
85 |
-
model.save("recommendation_model.h5")
|
86 |
-
|
87 |
-
# Evaluate the model
|
88 |
-
test_loss = model.evaluate([test["user_id"], test["book_id"]], test["rating"])
|
89 |
-
print(f"Test Loss: {test_loss}")
|
|
|
2 |
import numpy as np
|
3 |
from sklearn.model_selection import train_test_split
|
4 |
from sklearn.preprocessing import LabelEncoder
|
|
|
|
|
5 |
from tensorflow.keras.models import Model
|
6 |
from tensorflow.keras.layers import Input, Embedding, Flatten, concatenate, Dense
|
7 |
from tensorflow.keras.optimizers import Adam
|
8 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
9 |
+
import tensorflow as tf
|
10 |
+
|
11 |
+
# Check if GPU is available
|
12 |
+
gpu_available = tf.config.list_physical_devices('GPU')
|
13 |
+
print(gpu_available)
|
14 |
|
15 |
# Load datasets
|
16 |
+
books = pd.read_csv("../data/datasets/books.csv")
|
17 |
+
ratings = pd.read_csv("../data/datasets/ratings.csv")
|
18 |
|
19 |
# Preprocess data
|
20 |
user_encoder = LabelEncoder()
|
|
|
23 |
ratings["user_id"] = user_encoder.fit_transform(ratings["user_id"])
|
24 |
ratings["book_id"] = book_encoder.fit_transform(ratings["book_id"])
|
25 |
|
26 |
+
# Ensure all book IDs are included
|
27 |
+
all_books = np.arange(len(books))
|
|
|
28 |
|
29 |
# Define the neural network model
|
30 |
def build_model(num_users, num_books, embedding_size=50):
|
31 |
"""
|
32 |
Build a recommendation model.
|
|
|
33 |
Args:
|
34 |
num_users (int): The number of users in the dataset.
|
35 |
num_books (int): The number of books in the dataset.
|
36 |
embedding_size (int, optional): The size of the embedding vectors. Defaults to 50.
|
|
|
37 |
Returns:
|
38 |
keras.Model: The compiled recommendation model.
|
39 |
"""
|
40 |
user_input = Input(shape=(1,))
|
41 |
book_input = Input(shape=(1,))
|
42 |
|
43 |
+
user_embedding = Embedding(input_dim=num_users, output_dim=embedding_size)(user_input)
|
44 |
+
book_embedding = Embedding(input_dim=num_books, output_dim=embedding_size)(book_input)
|
|
|
|
|
|
|
|
|
45 |
|
46 |
user_flat = Flatten()(user_embedding)
|
47 |
book_flat = Flatten()(book_embedding)
|
|
|
55 |
|
56 |
return model
|
57 |
|
58 |
+
# Train the collaborative filtering model
|
59 |
+
with tf.device('/GPU:0') if gpu_available else tf.device('/CPU:0'):
|
60 |
+
model_cf = build_model(num_users=len(ratings["user_id"].unique()),
|
61 |
+
num_books=len(books))
|
62 |
+
model_cf.summary() # Display model summary
|
63 |
+
history = model_cf.fit([ratings["user_id"], ratings["book_id"]],
|
64 |
+
ratings["rating"],
|
65 |
+
epochs=5,
|
66 |
+
batch_size=128,
|
67 |
+
validation_split=0.1)
|
68 |
+
|
69 |
+
# Save the collaborative filtering model
|
70 |
+
model_cf.save("recommendation_model.keras")
|
71 |
+
|
72 |
+
# Evaluate the collaborative filtering model
|
73 |
+
test_loss = model_cf.evaluate([ratings["user_id"], ratings["book_id"]], ratings["rating"])
|
74 |
+
print(f"Collaborative Filtering Test Loss: {test_loss}")
|
75 |
+
|
76 |
+
# Test the recommendation functions
|
77 |
+
user_id = 0 # Example user ID
|
78 |
+
book_name = "The Great Gatsby" # Example book name
|
79 |
+
|
80 |
+
print("Content-Based Recommendation:")
|
81 |
+
print(content_based_recommendation(book_name, books))
|
82 |
+
|
83 |
+
print("\nModel-Recommended History-Based Recommendation:")
|
84 |
+
print(history_based_recommendation(user_id, model_cf, ratings))
|
85 |
|
86 |
+
print("\nHybrid Recommendation:")
|
87 |
+
print(hybrid_recommendation(user_id, book_name, model_cf, books, ratings))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|