File size: 814 Bytes
f861dee
 
 
ed26242
f861dee
 
 
 
 
ed26242
 
 
 
 
 
f861dee
ed26242
 
 
f861dee
ed26242
 
f861dee
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import faiss
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
from sklearn.datasets import fetch_20newsgroups

class DocumentRetriever:
    def __init__(self):
        self.documents = []

    def load_documents(self):
        """Load 20 Newsgroups dataset."""
        newsgroups_data = fetch_20newsgroups(subset='all')
        self.documents = newsgroups_data.data
        if not self.documents:
            print("No documents loaded!")

    def retrieve(self, query):
        """Retrieve documents related to the query."""
        if not self.documents:
            return ["Document retrieval is not initialized."]
        # Simple keyword match (can replace with advanced semantic similarity later)
        return [doc for doc in self.documents if query.lower() in doc.lower()]