Spaces:
Sleeping
Sleeping
import streamlit as st | |
from gradio_client import Client | |
import re | |
import os | |
import base64 | |
st.title("Application de transcription Whisper-JAX 🎙️") | |
# Specify the API URL | |
API_URL = "https://sanchit-gandhi-whisper-jax-spaces.hf.space" | |
# Initialize the Gradio client with the API URL | |
client = Client(API_URL) | |
client.view_api(return_format="dict") | |
# Function to transcribe an audio file using the specified API endpoint | |
def transcrire_audio(file_data, task="transcribe", return_timestamps=True): | |
"""Function to transcribe an audio file using the Whisper-JAX API endpoint.""" | |
# Encode the file data to base64 | |
base64_encoded_data = base64.b64encode(file_data).decode('utf-8') | |
# Prepare and send the request | |
response = client.predict( | |
base64_encoded_data, | |
task, | |
return_timestamps, | |
api_name="/predict_1" # Make sure this is the correct endpoint | |
) | |
return response[0], response[1] # Adjust according to the response structure returned by the API | |
# Streamlit widget to upload an audio file | |
fichier_telecharge = st.file_uploader("Choisissez un fichier audio", type=['mp3', 'wav', 'ogg']) | |
# Button to process the audio file | |
if st.button("Transcrire l'audio"): | |
if fichier_telecharge is not None: | |
# Read the file into memory | |
file_data = fichier_telecharge.getvalue() | |
# Call the transcription function | |
try: | |
transcription, runtime = transcrire_audio(file_data) | |
st.write("Transcription avec horodatage :", transcription) | |
# Display transcription without timestamps | |
transcription_sans_horodatages = remove_timestamps(transcription) | |
st.write("Transcription sans horodatage :", transcription_sans_horodatages) | |
except Exception as e: | |
st.error(f"Une erreur est survenue lors de la transcription : {str(e)}") | |
else: | |
st.error("Veuillez télécharger un fichier audio pour continuer.") | |
# Function to remove timestamps from text | |
def remove_timestamps(text): | |
# Pattern to match timestamps in the format [HH:MM:SS.mmm -> HH:MM:SS.mmm] | |
pattern = r"\[\d{2}:\d{2}:\d{2}\.\d{3} -> \d{2}:\d{2}:\d{2}\.\d{3}\]\s*" | |
# Replace matched patterns with an empty string | |
cleaned_text = re.sub(pattern, "", text) | |
return cleaned_text | |