Spaces:

chagu13
/

chagu-demo

Running

chagu-demo / rag_sec /document_retriver.py

talexm

news-data retrival

ed26242 13 days ago

814 Bytes

	import faiss
	from sklearn.feature_extraction.text import TfidfVectorizer
	import numpy as np
	from sklearn.datasets import fetch_20newsgroups

	class DocumentRetriever:
	def __init__(self):
	self.documents = []

	def load_documents(self):
	"""Load 20 Newsgroups dataset."""
	newsgroups_data = fetch_20newsgroups(subset='all')
	self.documents = newsgroups_data.data
	if not self.documents:
	print("No documents loaded!")

	def retrieve(self, query):
	"""Retrieve documents related to the query."""
	if not self.documents:
	return ["Document retrieval is not initialized."]
	# Simple keyword match (can replace with advanced semantic similarity later)
	return [doc for doc in self.documents if query.lower() in doc.lower()]