File size: 965 Bytes
0d3ec20
6245545
 
0d3ec20
3fcefd6
 
6245545
3fcefd6
6245545
 
 
 
 
 
3fcefd6
6245545
 
3fcefd6
6245545
 
3fcefd6
6245545
3fcefd6
 
6245545
 
5b8f3bd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import gradio as gr
from PyPDF2 import PdfReader
from transformers import pipeline

def summarize_and_extract_text(files):
    summarizer = pipeline("summarization")
    summaries = []
    extracted_texts = []
    for file in files:
        with open(file.name, "rb") as pdf_file:
            pdf_reader = PdfReader(pdf_file)
            text = ""
            for page in pdf_reader.pages:
                text += page.extract_text() + "\n"
            extracted_texts.append(text)
            summary = summarizer(text, max_length=50, min_length=10, do_sample=False)[0]['summary_text']
            summaries.append(summary)
    return summaries, extracted_texts

iface = gr.Interface(
    fn=summarize_and_extract_text,
    inputs=gr.UploadButton("Click to Upload a PDF", file_types=["pdf"], file_count="multiple"),
    outputs=[gr.Textbox(label="Summarized Text"), gr.Textbox(label="Extracted Text")],
    title="PDF Summarizer & Extracted Text"
)

iface.launch()