|
from sentence_transformers import SentenceTransformer |
|
from scipy.spatial.distance import cosine |
|
import numpy as np |
|
import pandas as pd |
|
from datasets import load_dataset |
|
import pickle as pkl |
|
def recommend(query, n=5): |
|
|
|
model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu') |
|
|
|
|
|
data = load_dataset('Mohamed-BC/Articles')['train'].to_pandas() |
|
|
|
a_embeddings = pkl.load(open('data/articles_embeddings.pkl', 'rb')) |
|
|
|
q_embedding = model.encode(query) |
|
|
|
cos_sim = np.array([1 - cosine(q_embedding, emb) for emb in a_embeddings[:1000]]) |
|
|
|
top_n = np.argsort(cos_sim)[-n:] |
|
return data.iloc[top_n]['title'] |
|
|