import gradio as gr import torch import io import base64 import numpy as np import scipy.io.wavfile from typing import Text from pyannote.audio import Pipeline from pyannote.audio import Audio from pyannote.core import Segment import gradio as gr import os import yt_dlp as youtube_dl from gradio_client import Client from transformers.pipelines.audio_utils import ffmpeg_read HF_TOKEN = os.environ.get("HF_TOKEN") # set up the diarization pipeline #diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.0", use_auth_token=HF_TOKEN) #diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=HF_TOKEN) diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token=HF_TOKEN) if torch.cuda.is_available(): diarization_pipeline.to(torch.device("cuda")) import gradio as gr def transcribe(audio_path, num_speakers=2): # Configure the pipeline to use the provided number of speakers #diarization_pipeline.n_speakers = num_speakers # Run diarization diarization = diarization_pipeline(audio_path,num_speakers=2) return diarization title = "SAML Speaker Diarization ⚡️ " description = """ pyannote speaker diarization running locally""" article = """SAMLOne Speaker Segmentation or Diarization""" import gradio as gr def greet(name): return "Hello " + name + "!!" # iface = gr.Interface(fn=transcribe, inputs=gr.inputs.Audio(source="upload", optional=True, label="Audio file", type="filepath"), outputs="text") # iface.launch() with gr.Blocks(theme="rawrsor1/Everforest") as demo: audio_input = gr.Audio(type="filepath") text_output = gr.Textbox( label="speaker diarization") speaker_diarization_button = gr.Button("Submit") speaker_diarization_button.click(fn=transcribe, inputs=[audio_input], outputs=[text_output]) demo.launch(debug=True)