Spaces:

Lenylvt
/

BetterWhisper

Sleeping

App Files Files Community

Lenylvt commited on May 9

Commit

998a321

•

1 Parent(s): 2d27b8f

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -34

app.py CHANGED Viewed

@@ -1,51 +1,60 @@
 import streamlit as st
 from gradio_client import Client
-st.title("Whisper-JAX Speech-to-Text App")
-# Specify the API URL
-API_URL = "https://sanchit-gandhi-whisper-jax.hf.space"
-# Initialize the Gradio client with the API URL
 client = Client(API_URL)
-# Function to transcribe audio using the specified API endpoint
-def transcribe_audio(audio_path="temp.mp3", task="transcription", return_timestamps=False):
-    """Function to transcribe an audio file using the Whisper-JAX endpoint."""
-    # Making a synchronous call to the predict method
-    # Note that file needs to be passed as a tuple with the format: (filename, filedata)
-    text, runtime = client.predict(
-        ("file", open(audio_path, "rb")),  # Opening file in binary read mode
         task,
         return_timestamps,
-        api_name="/predict_1"  # Ensure this is the correct endpoint
     )
-    return text, runtime
-# Streamlit widget to upload an audio file
-uploaded_file = st.file_uploader("Choose an audio file", type=['mp3', 'wav', 'ogg'])
-# Options for the task and timestamp inclusion
-task = st.radio("Choose a task", ["Transcription", "Translation"], index=0)
-return_timestamps = st.checkbox("Return timestamps with transcription")
-# Button to process the audio file
-if st.button("Transcribe Audio"):
-    if uploaded_file is not None:
-        # Save uploaded file temporarily
-        file_path = f"temp.mp3"
-        with open(file_path, "wb") as f:
-            f.write(uploaded_file.getbuffer())
-        # Call the transcribe function
         try:
-            transcription = transcribe_audio()
-            st.write("Transcription:", transcription)
         except Exception as e:
-            st.error(f"An error occurred during transcription: {str(e)}")
         finally:
-            # Clean up the temporary file
-            import os
-            os.remove(file_path)
     else:
-        st.error("Please upload an audio file to proceed.")

 import streamlit as st
 from gradio_client import Client
+import re
+import os
+st.title("Application de transcription Whisper-JAX 🎙️")
+# Spécifiez l'URL de l'API
+API_URL = "https://sanchit-gandhi-whisper-jax-spaces.hf.space"
+# Initialisez le client Gradio avec l'URL de l'API
 client = Client(API_URL)
+# Fonction pour transcrire un fichier audio en utilisant le point d'API spécifié
+def transcrire_audio(chemin_audio, task="transcription", return_timestamps=True):
+    """Fonction pour transcrire un fichier audio en utilisant le point d'API Whisper-JAX."""
+    # Appel synchrone à la méthode de prédiction
+    # Notez que le fichier doit être passé sous forme de tuple avec le format : (nom_fichier, donnedées_fichier)
+    texte, duree = client.predict(
+        ("file", open(chemin_audio, "rb")),  # Ouverture du fichier en mode lecture binaire
         task,
         return_timestamps,
+        api_name="/predict_1"  # Assurez-vous que c'est le bon endpoint
     )
+    return texte, duree
+# Widget Streamlit pour télécharger un fichier audio
+fichier_telecharge = st.file_uploader("Choisissez un fichier audio", type=['mp3', 'wav', 'ogg'])
+# Bouton pour traiter le fichier audio
+if st.button("Transcrire l'audio"):
+    if fichier_telecharge is not None:
+        # Enregistrez le fichier téléchargé temporairement
+        chemin_fichier = f"temp_{fichier_telecharge.name}"
+        with open(chemin_fichier, "wb") as f:
+            f.write(fichier_telecharge.getbuffer())
+        # Appel de la fonction de transcription
         try:
+            transcription, temps_traitement = transcrire_audio(chemin_fichier)
+            st.write("Transcription avec horodatage :", transcription)
+            # Affichage de la transcription sans horodatages
+            transcription_sans_horodatages = remove_timestamps(transcription)
+            st.write("Transcription sans horodatage :", transcription_sans_horodatages)
         except Exception as e:
+            st.error(f"Une erreur est survenue lors de la transcription : {str(e)}")
         finally:
+            # Nettoyage du fichier temporaire
+            os.remove(chemin_fichier)
     else:
+        st.error("Veuillez télécharger un fichier audio pour continuer.")
+# Fonction pour supprimer les horodatages du texte
+def remove_timestamps(texte):
+    # Motif pour correspondre aux horodatages au format [HH:MM:SS.mmm -> HH:MM:SS.mmm]
+    motif = r"\[\d{2}:\d{2}:\d{2}\.\d{3} -> \d{2}:\d{2}:\d{2}\.\d{3}\]\s*"
+    # Remplacer les motifs correspondants par une chaîne vide
+    texte_nettoye = re.sub(motif, "", texte)
+    return texte_nettoye