|
from transformers import WhisperProcessor, WhisperForConditionalGeneration |
|
from huggingface_hub import login |
|
import gradio as gr |
|
import numpy as np |
|
import ffmpeg |
|
import torch |
|
import time |
|
import os |
|
|
|
login(os.environ["HF"], add_to_git_credential=True) |
|
|
|
|
|
model_name = "Kushtrim/whisper-large-v3-turbo-shqip" |
|
processor = WhisperProcessor.from_pretrained(model_name) |
|
model = WhisperForConditionalGeneration.from_pretrained(model_name) |
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
model.to(device) |
|
|
|
def transcribe_chunk(audio_chunk): |
|
"""Përkthe një pjesë të audios duke përdorur Whisper.""" |
|
audio_array = np.frombuffer(audio_chunk, dtype=np.int16).astype(np.float32) / 32768.0 |
|
inputs = processor(audio_array, sampling_rate=16000, return_tensors="pt").to(device) |
|
generated_ids = model.generate( |
|
inputs.input_features, |
|
forced_decoder_ids=processor.get_decoder_prompt_ids(language="en") |
|
) |
|
return processor.batch_decode(generated_ids, skip_special_tokens=True)[0] |
|
|
|
def stream_and_transcribe_radio(stream_url, chunk_duration=5): |
|
""" |
|
Transkripto audio të drejtpërdrejtë nga një stream radioje. |
|
|
|
Args: |
|
stream_url (str): URL i stream-it audio. |
|
chunk_duration (int): Kohëzgjatja e secilës pjesë për t'u përpunuar në sekonda. |
|
""" |
|
process = ( |
|
ffmpeg.input(stream_url) |
|
.output('pipe:', format='wav', acodec='pcm_s16le', ac=1, ar='16000') |
|
.run_async(pipe_stdout=True, pipe_stderr=True) |
|
) |
|
|
|
chunk_size = chunk_duration * 16000 * 2 |
|
transcription_log = "" |
|
try: |
|
for i in range(60): |
|
audio_chunk = process.stdout.read(chunk_size) |
|
if not audio_chunk: |
|
break |
|
transcription = transcribe_chunk(audio_chunk) |
|
current_time = time.strftime("%H:%M:%S", time.localtime()) |
|
new_transcription = f"[{current_time}] {transcription}" |
|
transcription_log += new_transcription + "\n" |
|
yield transcription_log |
|
finally: |
|
process.kill() |
|
|
|
|
|
def transcribe_app(stream_url): |
|
return stream_and_transcribe_radio(stream_url, chunk_duration=5) |
|
|
|
with gr.Blocks() as app: |
|
gr.Markdown(""" |
|
<h1 style="text-align: center;">🎤 Transkriptim i Radios në Shqip</h1> |
|
<p style="text-align: center;"> Ky aplikacion ju lejon të transkriptoni audio të drejtpërdrejtë nga një stream radioje për 5 minuta. Vendosni URL-në e stream-it dhe klikoni "Fillo Transkriptimin".</p> |
|
<p style="text-align: center;">Radio Llapi: http://radio.1dhe1.com:8555/stream</p> |
|
<p style="text-align: center;">Radio Dukagjini: https://s2.voscast.com:8825/radiodukagjini</p> |
|
<p style="text-align: center;">Autori: Kushtrim Visoka</p> |
|
<p style="text-align: center;"> |
|
Modeli i përdorur për transkriptim: |
|
<a href="https://huggingface.co/Kushtrim/whisper-large-v3-turbo-shqip-115h" target="_blank">Kushtrim/whisper-large-v3-turbo-shqip-115h</a> |
|
</p> |
|
""") |
|
|
|
|
|
stream_url_input = gr.Textbox(label="Vendosni URL-në e stream-it audio", value="https://s2.voscast.com:8825/radiodukagjini") |
|
|
|
|
|
transcribe_button = gr.Button("Fillo Transkriptimin") |
|
|
|
|
|
audio_player = gr.HTML(""" |
|
<div style="text-align: center; margin-top: 20px;"> |
|
<audio controls autoplay style="width: 80%; max-width: 600px;"> |
|
<source src="https://s2.voscast.com:8825/radiodukagjini" type="audio/mpeg"> |
|
Shfletuesi juaj nuk e mbështet elementin audio. |
|
</audio> |
|
</div> |
|
""") |
|
|
|
|
|
transcription_output = gr.Textbox(label="Dalja e Transkriptimit", lines=20, interactive=False) |
|
|
|
|
|
def update_transcription(stream_url): |
|
for transcription in transcribe_app(stream_url): |
|
yield transcription |
|
|
|
|
|
def update_audio_player(stream_url): |
|
return f""" |
|
<div style="text-align: center; margin-top: 20px;"> |
|
<audio controls autoplay style="width: 80%; max-width: 600px;"> |
|
<source src="{stream_url}" type="audio/mpeg"> |
|
Shfletuesi juaj nuk e mbështet elementin audio. |
|
</audio> |
|
</div> |
|
""" |
|
|
|
|
|
transcribe_button.click( |
|
update_transcription, |
|
inputs=stream_url_input, |
|
outputs=transcription_output |
|
) |
|
|
|
stream_url_input.change( |
|
update_audio_player, |
|
inputs=stream_url_input, |
|
outputs=audio_player |
|
) |
|
|
|
|
|
stream_url_input |
|
transcribe_button |
|
audio_player |
|
transcription_output |
|
|
|
|
|
app.launch() |
|
|