Spaces:
Sleeping
Sleeping
Shankarm08
commited on
Commit
•
2ceb5b6
1
Parent(s):
b80a1ef
Update app.py
Browse files
app.py
CHANGED
@@ -4,22 +4,22 @@ from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration
|
|
4 |
import pandas as pd
|
5 |
import pdfplumber
|
6 |
|
7 |
-
#
|
8 |
tokenizer = RagTokenizer.from_pretrained("facebook/rag-sequence-nq")
|
9 |
-
retriever = RagRetriever.from_pretrained("facebook/wiki_dpr", use_dummy_dataset=True)
|
10 |
model = RagSequenceForGeneration.from_pretrained("facebook/rag-sequence-nq", retriever=retriever)
|
11 |
|
12 |
-
#
|
13 |
def extract_text_from_pdf(pdf_file):
|
|
|
14 |
with pdfplumber.open(pdf_file) as pdf:
|
15 |
-
text = ""
|
16 |
for page in pdf.pages:
|
17 |
page_text = page.extract_text()
|
18 |
if page_text:
|
19 |
text += page_text + "\n"
|
20 |
return text.strip()
|
21 |
|
22 |
-
# Streamlit
|
23 |
st.title("RAG-Powered PDF & CSV Chatbot")
|
24 |
|
25 |
# CSV file upload
|
|
|
4 |
import pandas as pd
|
5 |
import pdfplumber
|
6 |
|
7 |
+
# Load the RAG model and tokenizer
|
8 |
tokenizer = RagTokenizer.from_pretrained("facebook/rag-sequence-nq")
|
9 |
+
retriever = RagRetriever.from_pretrained("facebook/wiki_dpr", use_dummy_dataset=True)
|
10 |
model = RagSequenceForGeneration.from_pretrained("facebook/rag-sequence-nq", retriever=retriever)
|
11 |
|
12 |
+
# Function to extract text from a PDF file
|
13 |
def extract_text_from_pdf(pdf_file):
|
14 |
+
text = ""
|
15 |
with pdfplumber.open(pdf_file) as pdf:
|
|
|
16 |
for page in pdf.pages:
|
17 |
page_text = page.extract_text()
|
18 |
if page_text:
|
19 |
text += page_text + "\n"
|
20 |
return text.strip()
|
21 |
|
22 |
+
# Streamlit app
|
23 |
st.title("RAG-Powered PDF & CSV Chatbot")
|
24 |
|
25 |
# CSV file upload
|