BetterWhisper / app.py
Lenylvt's picture
Update app.py
9846a71 verified
raw
history blame
2.31 kB
import streamlit as st
from gradio_client import Client
import re
import os
import base64
st.title("Application de transcription Whisper-JAX 🎙️")
# Specify the API URL
API_URL = "https://sanchit-gandhi-whisper-jax-spaces.hf.space"
# Initialize the Gradio client with the API URL
client = Client(API_URL)
client.view_api(return_format="dict")
# Function to transcribe an audio file using the specified API endpoint
def transcrire_audio(file_data, task="transcribe", return_timestamps=True):
"""Function to transcribe an audio file using the Whisper-JAX API endpoint."""
# Encode the file data to base64
base64_encoded_data = base64.b64encode(file_data).decode('utf-8')
# Prepare and send the request
response = client.predict(
base64_encoded_data,
task,
return_timestamps,
api_name="/predict_1" # Make sure this is the correct endpoint
)
return response[0], response[1] # Adjust according to the response structure returned by the API
# Streamlit widget to upload an audio file
fichier_telecharge = st.file_uploader("Choisissez un fichier audio", type=['mp3', 'wav', 'ogg'])
# Button to process the audio file
if st.button("Transcrire l'audio"):
if fichier_telecharge is not None:
# Read the file into memory
file_data = fichier_telecharge.getvalue()
# Call the transcription function
try:
transcription, runtime = transcrire_audio(file_data)
st.write("Transcription avec horodatage :", transcription)
# Display transcription without timestamps
transcription_sans_horodatages = remove_timestamps(transcription)
st.write("Transcription sans horodatage :", transcription_sans_horodatages)
except Exception as e:
st.error(f"Une erreur est survenue lors de la transcription : {str(e)}")
else:
st.error("Veuillez télécharger un fichier audio pour continuer.")
# Function to remove timestamps from text
def remove_timestamps(text):
# Pattern to match timestamps in the format [HH:MM:SS.mmm -> HH:MM:SS.mmm]
pattern = r"\[\d{2}:\d{2}:\d{2}\.\d{3} -> \d{2}:\d{2}:\d{2}\.\d{3}\]\s*"
# Replace matched patterns with an empty string
cleaned_text = re.sub(pattern, "", text)
return cleaned_text