destiratnakomala commited on
Commit
4d4f8e6
·
verified ·
1 Parent(s): 89ebe02

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -51
app.py CHANGED
@@ -47,57 +47,34 @@ st.markdown(
47
 
48
 
49
  #---------------------PDF OVERVIEW----------------------
50
- def get_pdf_details(folder_path):
51
- pdf_details = []
52
- for filename in os.listdir(folder_path):
53
- if filename.lower().endswith('.pdf'):
54
- pdf_path = os.path.join(folder_path, filename)
55
- try:
56
- with open(pdf_path, "rb") as file:
57
- pdf_reader = PdfReader(file)
58
- page_count = len(pdf_reader.pages)
59
- pdf_details.append({"Berkas Putusan": filename, "Jumlah Halaman": page_count})
60
- except Exception as e:
61
- st.warning(f"Could not read {filename}: {str(e)}")
62
- return pdf_details
63
-
64
- pdf_list = get_pdf_details(pdf_folder)
65
- pdf_df = pd.DataFrame(pdf_list)
66
-
67
- col1, col2= st.columns(2)
68
- with col1:
69
- if not pdf_df.empty:
70
- with st.expander('PDF Overview'):
71
- st.dataframe(pdf_df)
72
- else:
73
- st.warning("No PDFs found in the specified folder.")
74
-
75
-
76
- #---------------------MULTISELECT AND TEXT EXTRACTION----------------------
77
-
78
- pdf_files = [f for f in os.listdir(pdf_folder) if f.lower().endswith('.pdf')]
79
- with col2:
80
- with st.expander('PDF Documents'):
81
- selected_pdfs = st.multiselect("Select PDFs", pdf_files)
82
- def extract_text_from_pdf(uploaded_file, start_page, end_page):
83
- text = extract_text(uploaded_file, page_numbers=range(start_page, end_page+1))
84
- return text
85
-
86
- pdf_texts = {}
87
-
88
- for selected_pdf in selected_pdfs:
89
- pdf_path = os.path.join(pdf_folder, selected_pdf)
90
- uploaded_file = open(pdf_path, 'rb')
91
- pdf_reader = PdfReader(uploaded_file)
92
- total_pages = len(pdf_reader.pages)
93
-
94
- # Extract text from the first 3 pages and the last 3 pages
95
- extracted_text_first = extract_text_from_pdf(uploaded_file, 1, min(3, total_pages))
96
- extracted_text_last = extract_text_from_pdf(uploaded_file, max(1, total_pages - 2), total_pages)
97
-
98
- extracted_text = extracted_text_first + "\n" + extracted_text_last
99
-
100
- pdf_texts[selected_pdf] = extracted_text
101
 
102
 
103
 
 
47
 
48
 
49
  #---------------------PDF OVERVIEW----------------------
50
+ # Function to read PDF file
51
+ def read_pdf(file):
52
+ try:
53
+ pdf_reader = PdfFileReader(file)
54
+ num_pages = pdf_reader.numPages
55
+ pdf_text = ""
56
+ for page_number in range(num_pages):
57
+ page = pdf_reader.getPage(page_number)
58
+ pdf_text += page.extractText()
59
+ return pdf_text
60
+ except PdfReadError as e:
61
+ st.error(f"Error reading PDF: {e}")
62
+ return None
63
+
64
+ # Sidebar
65
+ st.sidebar.title("PDF Viewer")
66
+
67
+ # File uploader for PDFs
68
+ uploaded_files = st.sidebar.file_uploader("Upload PDF", type=["pdf"], accept_multiple_files=True)
69
+
70
+ # Display uploaded PDFs
71
+ if uploaded_files:
72
+ for uploaded_file in uploaded_files:
73
+ file_details = {"Filename": uploaded_file.name, "Filesize": uploaded_file.size}
74
+ st.sidebar.write(file_details)
75
+ pdf_text = read_pdf(uploaded_file)
76
+ if pdf_text:
77
+ st.write(pdf_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
 
80