Spaces:

Narayana02
/

IntelliLearn

Sleeping

App Files Files Community

IntelliLearn / utils.py

Narayana02

Rename utilities.py to utils.py

6f169ed verified 16 days ago

raw

history blame contribute delete

2.38 kB

	import PyPDF2
	import json
	import networkx as nx
	from sentence_transformers import SentenceTransformer, util
	import openai

	# Model for embeddings
	model = SentenceTransformer("all-MiniLM-L6-v2")

	# 1. Extract Text from PDF
	def extract_text_from_pdf(file_path):
	"""Extract text from a PDF."""
	text = ""
	with open(file_path, "rb") as f:
	reader = PyPDF2.PdfReader(f)
	for page in reader.pages:
	text += page.extract_text()
	return text

	# 2. Build Hierarchical Tree
	def build_hierarchical_tree(text, textbook_title):
	"""Create a hierarchical tree structure."""
	lines = text.split("\n")
	tree = {"title": textbook_title, "chapters": []}
	current_chapter = None

	for line in lines:
	if line.strip().startswith("Chapter"):
	current_chapter = {"title": line.strip(), "sections": []}
	tree["chapters"].append(current_chapter)
	elif current_chapter and line.strip():
	current_chapter["sections"].append(line.strip())
	return tree

	def save_tree(tree, path):
	"""Save the hierarchical tree."""
	with open(path, "w") as f:
	json.dump(tree, f, indent=4)

	# 3. Hybrid Retrieval
	def hybrid_retrieval(query, openai_api_key):
	"""Retrieve relevant text using hybrid methods."""
	with open("hierarchical_trees/example_tree.json") as f: # Adjust file path as needed
	tree = json.load(f)

	all_sections = [
	section for chapter in tree["chapters"] for section in chapter["sections"]
	]
	query_embedding = model.encode(query, convert_to_tensor=True)
	section_embeddings = model.encode(all_sections, convert_to_tensor=True)
	similarities = util.pytorch_cos_sim(query_embedding, section_embeddings)

	top_indices = similarities[0].topk(3).indices.tolist()
	return " ".join([all_sections[i] for i in top_indices])

	# 4. RAG Answer Generation
	def rag_answer(query, context, openai_api_key):
	"""Generate an answer using Retrieval-Augmented Generation."""
	openai.api_key = openai_api_key
	response = openai.Completion.create(
	engine="text-davinci-003",
	prompt=f"Answer the question based on the context below:\n\nContext: {context}\n\nQuestion: {query}\n\nAnswer:",
	max_tokens=150,
	)
	return response.choices[0].text.strip()