Mrlongpro's picture
Rename app.py to app1.py
1ffb08e verified
import gradio as gr
import whisper # Library for speech recognition
from transformers import pipeline
import pandas as pd
# Load the Whisper model for speech recognition
whisper_model = whisper.load_model("base")
# Load the summarization model from Hugging Face
summarization = pipeline("summarization", model="google/pegasus-large")
def process_audio(audio_file, min_length, max_length):
try:
# Ensure audio_file is not None and has valid content
if audio_file is None:
raise ValueError("No audio file provided.")
# Use the Whisper model to transcribe the audio file into text
result = whisper_model.transcribe(audio_file)
text = result['text']
# Check if transcription was successful
if not text:
raise ValueError("Failed to transcribe the audio. The transcription result is empty.")
# Use the summarization pipeline to summarize the transcribed text
summary_result = summarization(text, min_length=min_length, max_length=max_length)
summary = summary_result[0]['summary_text']
# Check if summarization was successful
if not summary:
raise ValueError("Failed to summarize the transcript. The summary result is empty.")
# Create a DataFrame to store the audio file, transcript, and summary
df_results = pd.DataFrame({
"Audio File": [audio_file], # Store the path to the audio file
"Transcript": [text], # Store the transcribed text
"Summary": [summary] # Store the generated summary
})
# Save the results to a CSV file named "results.csv"
df_results.to_csv("results.csv", index=False)
# Return the transcript and summary to be displayed in the Gradio interface
return text, summary
except Exception as e:
# General error handling
error_message = f"An error occurred: {str(e)}"
return error_message, error_message
# Create a Gradio interface
iface = gr.Interface(
fn=process_audio, # The function to be called when processing the input
inputs=[
gr.Audio(sources="upload", type="filepath", label="Upload your audio file"), # Audio input field for file upload
gr.Slider(minimum=10, maximum=50, value=30, label="Minimum Summary Length"), # Slider for setting minimum summary length
gr.Slider(minimum=50, maximum=600, value=100, label="Maximum Summary Length") # Slider for setting maximum summary length
],
outputs=[
gr.Textbox(label="Transcript"), # Textbox for displaying the transcript
gr.Textbox(label="Summary") # Textbox for displaying the summary
],
title="Audio to Summarized Transcript", # Title of the app
description="Upload an audio file and adjust summary length to get both the transcript and summary." # Description of the app
)
# Launch the app
iface.launch()