ZeeAI1 commited on
Commit
8090f7b
1 Parent(s): f283421

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -0
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import streamlit as st
4
+ from io import BytesIO
5
+ from PyPDF2 import PdfReader
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain.embeddings import HuggingFaceEmbeddings
8
+ from langchain.vectorstores import FAISS
9
+ from transformers import pipeline
10
+ import torch
11
+
12
+ st.set_page_config(page_title="RAG-based PDF Chat", layout="centered", page_icon="📄")
13
+
14
+ @st.cache_resource
15
+ def load_summarization_pipeline():
16
+ try:
17
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=0 if torch.cuda.is_available() else -1)
18
+ return summarizer
19
+ except Exception as e:
20
+ st.error(f"Failed to load the summarization model: {e}")
21
+ return None
22
+
23
+ summarizer = load_summarization_pipeline()
24
+
25
+ PDF_FOLDERS = {
26
+ "Folder 1": ["https://huggingface.co/username/repo/resolve/main/file1.pdf"]
27
+ }
28
+
29
+ def fetch_pdf_text_from_folders(pdf_folders):
30
+ all_text = ""
31
+ for folder_name, urls in pdf_folders.items():
32
+ folder_text = f"\n[Folder: {folder_name}]\n"
33
+ for url in urls:
34
+ try:
35
+ response = requests.get(url)
36
+ response.raise_for_status()
37
+ pdf_file = BytesIO(response.content)
38
+ pdf_reader = PdfReader(pdf_file)
39
+ for page in pdf_reader.pages:
40
+ page_text = page.extract_text()
41
+ if page_text:
42
+ folder_text += page_text
43
+ except Exception as e:
44
+ st.error(f"Error fetching PDF from {url}: {e}")
45
+ all_text += folder_text
46
+ return all_text
47
+
48
+ @st.cache_data
49
+ def get_text_chunks(text):
50
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
51
+ return text_splitter.split_text(text)
52
+
53
+ @st.cache_resource
54
+ def load_embedding_function():
55
+ try:
56
+ return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
57
+ except Exception as e:
58
+ st.error(f"Failed to load embedding model: {e}")
59
+ return None
60
+
61
+ embedding_function = load_embedding_function()
62
+
63
+ @st.cache_resource
64
+ def load_or_create_vector_store(text_chunks):
65
+ if not text_chunks:
66
+ st.error("No valid text chunks found.")
67
+ return None
68
+ try:
69
+ return FAISS.from_texts(text_chunks, embedding=embedding_function)
70
+ except Exception as e:
71
+ st.error(f"Failed to create or load vector store: {e}")
72
+ return None
73
+
74
+ def generate_summary_with_huggingface(query, retrieved_text):
75
+ summarization_input = f"{query}\n\nRelated information:\n{retrieved_text}"[:1024]
76
+ try:
77
+ summary = summarizer(summarization_input, max_length=500, min_length=50, do_sample=False)
78
+ return summary[0]["summary_text"]
79
+ except Exception as e:
80
+ st.error(f"Failed to generate summary: {e}")
81
+ return "Error generating summary."
82
+
83
+ def user_input(user_question, vector_store):
84
+ if vector_store i
85
+