Spaces:

Lenylvt
/

BetterWhisper

Sleeping

App Files Files Community

BetterWhisper / app.py

Lenylvt

Update app.py

9846a71 verified 7 months ago

raw

history blame

2.31 kB

	import streamlit as st
	from gradio_client import Client
	import re
	import os
	import base64

	st.title("Application de transcription Whisper-JAX 🎙️")

	# Specify the API URL
	API_URL = "https://sanchit-gandhi-whisper-jax-spaces.hf.space"

	# Initialize the Gradio client with the API URL
	client = Client(API_URL)
	client.view_api(return_format="dict")

	# Function to transcribe an audio file using the specified API endpoint
	def transcrire_audio(file_data, task="transcribe", return_timestamps=True):
	"""Function to transcribe an audio file using the Whisper-JAX API endpoint."""
	# Encode the file data to base64
	base64_encoded_data = base64.b64encode(file_data).decode('utf-8')

	# Prepare and send the request
	response = client.predict(
	base64_encoded_data,
	task,
	return_timestamps,
	api_name="/predict_1" # Make sure this is the correct endpoint
	)
	return response[0], response[1] # Adjust according to the response structure returned by the API

	# Streamlit widget to upload an audio file
	fichier_telecharge = st.file_uploader("Choisissez un fichier audio", type=['mp3', 'wav', 'ogg'])

	# Button to process the audio file
	if st.button("Transcrire l'audio"):
	if fichier_telecharge is not None:
	# Read the file into memory
	file_data = fichier_telecharge.getvalue()

	# Call the transcription function
	try:
	transcription, runtime = transcrire_audio(file_data)
	st.write("Transcription avec horodatage :", transcription)

	# Display transcription without timestamps
	transcription_sans_horodatages = remove_timestamps(transcription)
	st.write("Transcription sans horodatage :", transcription_sans_horodatages)
	except Exception as e:
	st.error(f"Une erreur est survenue lors de la transcription : {str(e)}")
	else:
	st.error("Veuillez télécharger un fichier audio pour continuer.")

	# Function to remove timestamps from text
	def remove_timestamps(text):
	# Pattern to match timestamps in the format [HH:MM:SS.mmm -> HH:MM:SS.mmm]
	pattern = r"\[\d{2}:\d{2}:\d{2}\.\d{3} -> \d{2}:\d{2}:\d{2}\.\d{3}\]\s*"
	# Replace matched patterns with an empty string
	cleaned_text = re.sub(pattern, "", text)
	return cleaned_text