Spaces:
Sleeping
Sleeping
File size: 814 Bytes
f861dee ed26242 f861dee ed26242 f861dee ed26242 f861dee ed26242 f861dee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
import faiss
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
from sklearn.datasets import fetch_20newsgroups
class DocumentRetriever:
def __init__(self):
self.documents = []
def load_documents(self):
"""Load 20 Newsgroups dataset."""
newsgroups_data = fetch_20newsgroups(subset='all')
self.documents = newsgroups_data.data
if not self.documents:
print("No documents loaded!")
def retrieve(self, query):
"""Retrieve documents related to the query."""
if not self.documents:
return ["Document retrieval is not initialized."]
# Simple keyword match (can replace with advanced semantic similarity later)
return [doc for doc in self.documents if query.lower() in doc.lower()]
|