Persian_Piper_TTS

Running

File size: 1,653 Bytes

e4eb5c5
944dedf
 
 
 
4492d6d
19be65d
a82f51b
 
3b61a4e
 
3e38fbb
a82f51b
4eb15f6
944dedf
8ce2dae
19be65d
f6a94c1
 
 
 
 
 
 
 
544fdea
 
f6a94c1
 
 
 
 
 
944dedf
1782e10
 
9e22b76
 
 
f6a94c1
ab3b67e
1782e10
 
ab3b67e
944dedf
f6a94c1

import gradio as gr
import wave
import numpy as np
from io import BytesIO
from huggingface_hub import hf_hub_download
from piper import PiperVoice 
from transformers import pipeline
import typing

model_path = hf_hub_download(repo_id="gyroing/Persian-Piper-Model-gyro", filename="fa_IR-gyro-medium.onnx")
config_path = hf_hub_download(repo_id="gyroing/Persian-Piper-Model-gyro", filename="fa_IR-gyro-medium.onnx.json")
voice = PiperVoice.load(model_path, config_path)


def synthesize_speech(text):


    # Create an in-memory buffer for the WAV file
    buffer = BytesIO()
    with wave.open(buffer, 'wb') as wav_file:
        wav_file.setframerate(voice.config.sample_rate)
        wav_file.setsampwidth(2)  # 16-bit
        wav_file.setnchannels(1)  # mono

        # Synthesize speech
        # eztext = preprocess_text(text)
        voice.synthesize(text, wav_file)

    # Convert buffer to NumPy array for Gradio output
    buffer.seek(0)
    audio_data = np.frombuffer(buffer.read(), dtype=np.int16)

    return audio_data.tobytes(), None

# Using Gradio Blocks
with gr.Blocks(theme=gr.themes.Base()) as blocks:
    gr.Markdown("# Persian Text to Speech Synthesizer")
    gr.Markdown("Enter text to synthesize it into speech using Piper+Hazm")
    input_text = gr.Textbox(label="ورود متن")
    output_audio = gr.Audio(label="Synthesized Speech", type="numpy")
    output_text = gr.Textbox(label="Output Text", visible=False)  # This is the new text output component
    submit_button = gr.Button("Synthesize")

    submit_button.click(synthesize_speech, inputs=input_text, outputs=[output_audio, output_text])
# Run the app
blocks.launch()