Spaces:

amendolajine
/

OPIT

Running

App Files Files Community

OPIT / app.py

amendolajine

Update app.py

f0137b9 over 1 year ago

raw

history blame contribute delete

4.2 kB

	# https://huggingface.co/spaces/amendolajine/OPIT

	# Here are the imports
	import logging
	import gradio as gr
	import fitz # PyMuPDF
	from transformers import BartTokenizer, BartForConditionalGeneration, pipeline
	import scipy.io.wavfile
	import numpy as np

	# Here is the code

	# Initialize logging
	logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')

	# Initialize tokenizers and models
	tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
	model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')
	synthesiser = pipeline("text-to-speech", "suno/bark")

	def extract_abstract(pdf_bytes):
	try:
	doc = fitz.open(stream=pdf_bytes, filetype="pdf")
	first_page = doc[0].get_text()
	start_idx = first_page.lower().find("abstract")
	end_idx = first_page.lower().find("introduction")
	if start_idx != -1 and end_idx != -1:
	return first_page[start_idx:end_idx].strip()
	else:
	return "Abstract not found or 'Introduction' not found in the first page."
	except Exception as e:
	logging.error(f"Error extracting abstract: {e}")
	return "Error in abstract extraction"

	def process_text(uploaded_file):
	logging.debug(f"Uploaded file type: {type(uploaded_file)}")
	logging.debug(f"Uploaded file content: {uploaded_file}")

	try:
	with open(uploaded_file, "rb") as file:
	pdf_bytes = file.read()
	except Exception as e:
	logging.error(f"Error reading file from path: {e}")
	return "Error reading PDF file", None

	try:
	abstract_text = extract_abstract(pdf_bytes)
	logging.info(f"Extracted abstract: {abstract_text[:200]}...")
	except Exception as e:
	logging.error(f"Error in abstract extraction: {e}")
	return "Error in processing PDF", None

	try:
	inputs = tokenizer([abstract_text], max_length=1024, return_tensors='pt', truncation=True, padding="max_length")
	summary_ids = model.generate(
	input_ids=inputs['input_ids'],
	attention_mask=inputs['attention_mask'],
	pad_token_id=model.config.pad_token_id,
	num_beams=4,
	max_length=45,
	min_length=10,
	length_penalty=2.0,
	early_stopping=True,
	no_repeat_ngram_size=2
	)
	summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

	words = summary.split()
	cleaned_summary = []
	for i, word in enumerate(words):
	if '-' in word and i < len(words) - 1:
	word = word.replace('-', '') + words[i + 1]
	words[i + 1] = ""

	if '.' in word and i != len(words) - 1:
	word = word.replace('.', '')
	cleaned_summary.append(word + ' and')
	else:
	cleaned_summary.append(word)

	final_summary = ' '.join(cleaned_summary)
	final_summary = final_summary[0].upper() + final_summary[1:]
	final_summary = ' '.join(w[0].lower() + w[1:] if w.lower() != 'and' else w for w in final_summary.split())

	speech = synthesiser(final_summary, forward_params={"do_sample": True})
	audio_data = speech["audio"].squeeze()
	normalized_audio_data = np.int16(audio_data / np.max(np.abs(audio_data)) * 32767)

	output_file = "temp_output.wav"
	scipy.io.wavfile.write(output_file, rate=speech["sampling_rate"], data=normalized_audio_data)

	return final_summary, output_file
	except Exception as e:
	logging.error(f"Error in summary generation or TTS conversion: {e}")
	return "Error in summary or speech generation", None

	iface = gr.Interface(
	fn=process_text,
	inputs=gr.components.File(label="Upload a research PDF containing an abstract"),
	outputs=["text", "audio"],
	title="Summarize an abstract and vocalize it",
	description="Upload a research paper in PDF format to extract, summarize its abstract, and convert the summarization to speech. If the upload doesn't work on the first try, refresh the page (CTRL+F5) and try again."
	)

	if __name__ == "__main__":
	iface.launch()