kahennefer commited on
Commit
b2e2224
1 Parent(s): 11336ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -4
app.py CHANGED
@@ -1,7 +1,105 @@
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ #libraries
3
+ from transformers import pipeline # Import the pipeline function from transformers
4
+ from PyPDF2 import PdfReader
5
+ import torch
6
+ from bark import SAMPLE_RATE, generate_audio, preload_models
7
+ from scipy.io.wavfile import write as write_wav
8
+ from IPython.display import Audio
9
  import gradio as gr
10
 
 
 
11
 
12
+ def summarize_abstract_from_pdf(pdf_file_path):
13
+ # Initialize the summarization pipeline
14
+
15
+ abstract_string = 'abstract'
16
+ found_abstract = False
17
+ intro_string ='introduction'
18
+ extracted_text_string =""
19
+
20
+ # Read the PDF and extract text from the first page
21
+ with open(pdf_file_path, 'rb') as pdf_file:
22
+ reader = PdfReader(pdf_file)
23
+ text = ""
24
+ text += reader.pages[0].extract_text()
25
+
26
+
27
+ file = text.splitlines()
28
+ for lines in file:
29
+ lower_lines = lines.lower()
30
+ if lower_lines.strip()== abstract_string:
31
+ found_abstract = True
32
+ elif "1" in lower_lines.strip() and intro_string in lower_lines.strip():
33
+ found_abstract = False
34
+
35
+ if found_abstract == True:
36
+ extracted_text_string += lines
37
+
38
+
39
+ extracted_text_string = extracted_text_string.replace("Abstract", "")
40
+ summarizer = pipeline("summarization", "pszemraj/led-base-book-summary",device=0 if torch.cuda.is_available() else -1,)
41
+ # Generate a summarized abstract using the specified model
42
+ summarized_abstract = summarizer(extracted_text_string,
43
+ min_length=16,
44
+ max_length=150,
45
+ no_repeat_ngram_size=3,
46
+ encoder_no_repeat_ngram_size=3,
47
+ repetition_penalty=3.5,
48
+ num_beams=4,
49
+ early_stopping=True,
50
+ )
51
+ #I run this twice to get summazired text
52
+ summarized_abstract2 = summarizer(summarized_abstract[0]['summary_text'],
53
+ min_length=16,
54
+ max_length=25,
55
+ no_repeat_ngram_size=3,
56
+ encoder_no_repeat_ngram_size=3,
57
+ repetition_penalty=3.5,
58
+ num_beams=4,
59
+ early_stopping=True,
60
+ )
61
+
62
+
63
+
64
+ # Return the summarized abstract as a string
65
+ return summarized_abstract2[0]['summary_text']
66
+
67
+ def generate_audio(pdf_file_path):
68
+ model_name = "suno/bark-small"
69
+
70
+ # Download and load the specified model
71
+ preload_models(model_name)
72
+
73
+ # Generate audio from text
74
+ #call the summarize abstract function
75
+ text_prompt = summarize_abstract_from_pdf(pdf_file_path)
76
+ audio_array = generate_audio(text_prompt)
77
+
78
+ # Create a temporary WAV file to save the audio
79
+ with NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav_file:
80
+ wav_file_path = temp_wav_file.name
81
+ write_wav(wav_file_path, SAMPLE_RATE, audio_array)
82
+
83
+ # Return the path to the saved audio file
84
+ return wav_file_path
85
+
86
+
87
+
88
+ # Define app name, app description, and examples
89
+ app_name = "PDF to Audio Converter"
90
+ app_description = "Convert text from a PDF file to audio. We only accept pdf formats"
91
+ examples = [
92
+ ["path/to/pdf/file1.pdf"],
93
+ ["path/to/pdf/file2.pdf"],
94
+ ]
95
+
96
+ # Create the Gradio app
97
+ demo = gr.Interface(
98
+ fn=generate_audio,
99
+ inputs="text",
100
+ outputs="audio",
101
+ title=app_name,
102
+ description=app_description,
103
+ examples=examples
104
+ )
105
+ demo.launch()