Shankarm08 commited on
Commit
2ceb5b6
1 Parent(s): b80a1ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -4,22 +4,22 @@ from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration
4
  import pandas as pd
5
  import pdfplumber
6
 
7
- # Initialize RAG components
8
  tokenizer = RagTokenizer.from_pretrained("facebook/rag-sequence-nq")
9
- retriever = RagRetriever.from_pretrained("facebook/wiki_dpr", use_dummy_dataset=True) # Correct usage of dataset
10
  model = RagSequenceForGeneration.from_pretrained("facebook/rag-sequence-nq", retriever=retriever)
11
 
12
- # Extract text from PDF
13
  def extract_text_from_pdf(pdf_file):
 
14
  with pdfplumber.open(pdf_file) as pdf:
15
- text = ""
16
  for page in pdf.pages:
17
  page_text = page.extract_text()
18
  if page_text:
19
  text += page_text + "\n"
20
  return text.strip()
21
 
22
- # Streamlit UI
23
  st.title("RAG-Powered PDF & CSV Chatbot")
24
 
25
  # CSV file upload
 
4
  import pandas as pd
5
  import pdfplumber
6
 
7
+ # Load the RAG model and tokenizer
8
  tokenizer = RagTokenizer.from_pretrained("facebook/rag-sequence-nq")
9
+ retriever = RagRetriever.from_pretrained("facebook/wiki_dpr", use_dummy_dataset=True)
10
  model = RagSequenceForGeneration.from_pretrained("facebook/rag-sequence-nq", retriever=retriever)
11
 
12
+ # Function to extract text from a PDF file
13
  def extract_text_from_pdf(pdf_file):
14
+ text = ""
15
  with pdfplumber.open(pdf_file) as pdf:
 
16
  for page in pdf.pages:
17
  page_text = page.extract_text()
18
  if page_text:
19
  text += page_text + "\n"
20
  return text.strip()
21
 
22
+ # Streamlit app
23
  st.title("RAG-Powered PDF & CSV Chatbot")
24
 
25
  # CSV file upload