Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -47,57 +47,34 @@ st.markdown(
|
|
47 |
|
48 |
|
49 |
#---------------------PDF OVERVIEW----------------------
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
pdf_files = [f for f in os.listdir(pdf_folder) if f.lower().endswith('.pdf')]
|
79 |
-
with col2:
|
80 |
-
with st.expander('PDF Documents'):
|
81 |
-
selected_pdfs = st.multiselect("Select PDFs", pdf_files)
|
82 |
-
def extract_text_from_pdf(uploaded_file, start_page, end_page):
|
83 |
-
text = extract_text(uploaded_file, page_numbers=range(start_page, end_page+1))
|
84 |
-
return text
|
85 |
-
|
86 |
-
pdf_texts = {}
|
87 |
-
|
88 |
-
for selected_pdf in selected_pdfs:
|
89 |
-
pdf_path = os.path.join(pdf_folder, selected_pdf)
|
90 |
-
uploaded_file = open(pdf_path, 'rb')
|
91 |
-
pdf_reader = PdfReader(uploaded_file)
|
92 |
-
total_pages = len(pdf_reader.pages)
|
93 |
-
|
94 |
-
# Extract text from the first 3 pages and the last 3 pages
|
95 |
-
extracted_text_first = extract_text_from_pdf(uploaded_file, 1, min(3, total_pages))
|
96 |
-
extracted_text_last = extract_text_from_pdf(uploaded_file, max(1, total_pages - 2), total_pages)
|
97 |
-
|
98 |
-
extracted_text = extracted_text_first + "\n" + extracted_text_last
|
99 |
-
|
100 |
-
pdf_texts[selected_pdf] = extracted_text
|
101 |
|
102 |
|
103 |
|
|
|
47 |
|
48 |
|
49 |
#---------------------PDF OVERVIEW----------------------
|
50 |
+
# Function to read PDF file
|
51 |
+
def read_pdf(file):
|
52 |
+
try:
|
53 |
+
pdf_reader = PdfFileReader(file)
|
54 |
+
num_pages = pdf_reader.numPages
|
55 |
+
pdf_text = ""
|
56 |
+
for page_number in range(num_pages):
|
57 |
+
page = pdf_reader.getPage(page_number)
|
58 |
+
pdf_text += page.extractText()
|
59 |
+
return pdf_text
|
60 |
+
except PdfReadError as e:
|
61 |
+
st.error(f"Error reading PDF: {e}")
|
62 |
+
return None
|
63 |
+
|
64 |
+
# Sidebar
|
65 |
+
st.sidebar.title("PDF Viewer")
|
66 |
+
|
67 |
+
# File uploader for PDFs
|
68 |
+
uploaded_files = st.sidebar.file_uploader("Upload PDF", type=["pdf"], accept_multiple_files=True)
|
69 |
+
|
70 |
+
# Display uploaded PDFs
|
71 |
+
if uploaded_files:
|
72 |
+
for uploaded_file in uploaded_files:
|
73 |
+
file_details = {"Filename": uploaded_file.name, "Filesize": uploaded_file.size}
|
74 |
+
st.sidebar.write(file_details)
|
75 |
+
pdf_text = read_pdf(uploaded_file)
|
76 |
+
if pdf_text:
|
77 |
+
st.write(pdf_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
|
80 |
|