Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	Upload 8 files
Browse files- .gitignore +6 -0
 - Kokoro_82M_Colab.ipynb +51 -0
 - README.md +124 -11
 - api.py +76 -0
 - app.py +262 -0
 - download_model.py +174 -0
 - requirements.txt +14 -0
 - srt_dubbing.py +557 -0
 
    	
        .gitignore
    ADDED
    
    | 
         @@ -0,0 +1,6 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            kokoro_audio/
         
     | 
| 2 | 
         
            +
            KOKORO/voices/
         
     | 
| 3 | 
         
            +
            cache/
         
     | 
| 4 | 
         
            +
            __pycache__/
         
     | 
| 5 | 
         
            +
            run_app.bat
         
     | 
| 6 | 
         
            +
            *.pth
         
     | 
    	
        Kokoro_82M_Colab.ipynb
    ADDED
    
    | 
         @@ -0,0 +1,51 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "nbformat": 4,
         
     | 
| 3 | 
         
            +
              "nbformat_minor": 0,
         
     | 
| 4 | 
         
            +
              "metadata": {
         
     | 
| 5 | 
         
            +
                "colab": {
         
     | 
| 6 | 
         
            +
                  "provenance": [],
         
     | 
| 7 | 
         
            +
                  "gpuType": "T4"
         
     | 
| 8 | 
         
            +
                },
         
     | 
| 9 | 
         
            +
                "kernelspec": {
         
     | 
| 10 | 
         
            +
                  "name": "python3",
         
     | 
| 11 | 
         
            +
                  "display_name": "Python 3"
         
     | 
| 12 | 
         
            +
                },
         
     | 
| 13 | 
         
            +
                "language_info": {
         
     | 
| 14 | 
         
            +
                  "name": "python"
         
     | 
| 15 | 
         
            +
                },
         
     | 
| 16 | 
         
            +
                "accelerator": "GPU"
         
     | 
| 17 | 
         
            +
              },
         
     | 
| 18 | 
         
            +
              "cells": [
         
     | 
| 19 | 
         
            +
                {
         
     | 
| 20 | 
         
            +
                  "cell_type": "code",
         
     | 
| 21 | 
         
            +
                  "source": [
         
     | 
| 22 | 
         
            +
                    "%cd /content/\n",
         
     | 
| 23 | 
         
            +
                    "!git clone https://github.com/NeuralFalconYT/Kokoro-82M-WebUI.git\n",
         
     | 
| 24 | 
         
            +
                    "!apt-get -qq -y install espeak-ng > /dev/null 2>&1\n",
         
     | 
| 25 | 
         
            +
                    "%cd /content/Kokoro-82M-WebUI\n",
         
     | 
| 26 | 
         
            +
                    "!python download_model.py\n",
         
     | 
| 27 | 
         
            +
                    "!pip install -r requirements.txt\n",
         
     | 
| 28 | 
         
            +
                    "from IPython.display import clear_output\n",
         
     | 
| 29 | 
         
            +
                    "clear_output()"
         
     | 
| 30 | 
         
            +
                  ],
         
     | 
| 31 | 
         
            +
                  "metadata": {
         
     | 
| 32 | 
         
            +
                    "id": "stDJD3G4KJwP"
         
     | 
| 33 | 
         
            +
                  },
         
     | 
| 34 | 
         
            +
                  "execution_count": null,
         
     | 
| 35 | 
         
            +
                  "outputs": []
         
     | 
| 36 | 
         
            +
                },
         
     | 
| 37 | 
         
            +
                {
         
     | 
| 38 | 
         
            +
                  "cell_type": "code",
         
     | 
| 39 | 
         
            +
                  "source": [
         
     | 
| 40 | 
         
            +
                    "%cd /content/Kokoro-82M-WebUI\n",
         
     | 
| 41 | 
         
            +
                    "!python app.py --share\n",
         
     | 
| 42 | 
         
            +
                    "# !python srt_dubbing.py --share"
         
     | 
| 43 | 
         
            +
                  ],
         
     | 
| 44 | 
         
            +
                  "metadata": {
         
     | 
| 45 | 
         
            +
                    "id": "XSQ2ShKtC1u9"
         
     | 
| 46 | 
         
            +
                  },
         
     | 
| 47 | 
         
            +
                  "execution_count": null,
         
     | 
| 48 | 
         
            +
                  "outputs": []
         
     | 
| 49 | 
         
            +
                }
         
     | 
| 50 | 
         
            +
              ]
         
     | 
| 51 | 
         
            +
            }
         
     | 
    	
        README.md
    CHANGED
    
    | 
         @@ -1,14 +1,127 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 1 | 
         
             
            ---
         
     | 
| 2 | 
         
            -
             
     | 
| 3 | 
         
            -
             
     | 
| 4 | 
         
            -
             
     | 
| 5 | 
         
            -
             
     | 
| 6 | 
         
            -
             
     | 
| 7 | 
         
            -
             
     | 
| 8 | 
         
            -
             
     | 
| 9 | 
         
            -
             
     | 
| 10 | 
         
            -
             
     | 
| 11 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 12 | 
         
             
            ---
         
     | 
| 13 | 
         | 
| 14 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            # Kokoro-TTS
         
     | 
| 2 | 
         
            +
             
     | 
| 3 | 
         
            +
            **Note:** This is not the official repository. Alternatives [kokoro-onnx](https://github.com/thewh1teagle/kokoro-onnx), [Kokoro-FastAPI](https://github.com/remsky/Kokoro-FastAPI), [kokoro](https://github.com/hexgrad/kokoro), [kokoro-web](https://huggingface.co/spaces/webml-community/kokoro-web), [Kokoro-Custom-Voice](https://huggingface.co/spaces/ysharma/Make_Custom_Voices_With_KokoroTTS)
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            [](https://colab.research.google.com/github/NeuralFalconYT/Kokoro-82M-WebUI/blob/main/Kokoro_82M_Colab.ipynb) <br>
         
     | 
| 6 | 
         
            +
            [](https://huggingface.co/spaces/hexgrad/Kokoro-TTS)
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
             
            ---
         
     | 
| 10 | 
         
            +
             
     | 
| 11 | 
         
            +
            ### Installation Tutorial
         
     | 
| 12 | 
         
            +
             
     | 
| 13 | 
         
            +
            My Python Version is 3.10.9.
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
            #### 1. Clone the GitHub Repository:
         
     | 
| 16 | 
         
            +
            ```bash
         
     | 
| 17 | 
         
            +
            git clone https://github.com/NeuralFalconYT/Kokoro-82M-WebUI.git
         
     | 
| 18 | 
         
            +
            cd Kokoro-82M-WebUI
         
     | 
| 19 | 
         
            +
            ```
         
     | 
| 20 | 
         
            +
             
     | 
| 21 | 
         
            +
            #### 2. Create a Python Virtual Environment:
         
     | 
| 22 | 
         
            +
            ```bash
         
     | 
| 23 | 
         
            +
            python -m venv myenv
         
     | 
| 24 | 
         
            +
            ```
         
     | 
| 25 | 
         
            +
            This command creates a new Python virtual environment named `myenv` for isolating dependencies.
         
     | 
| 26 | 
         
            +
             
     | 
| 27 | 
         
            +
            #### 3. Activate the Virtual Environment:
         
     | 
| 28 | 
         
            +
            - **For Windows:**
         
     | 
| 29 | 
         
            +
              ```bash
         
     | 
| 30 | 
         
            +
              myenv\Scripts\activate
         
     | 
| 31 | 
         
            +
              ```
         
     | 
| 32 | 
         
            +
            - **For Linux:**
         
     | 
| 33 | 
         
            +
              ```bash
         
     | 
| 34 | 
         
            +
              source myenv/bin/activate
         
     | 
| 35 | 
         
            +
              ```
         
     | 
| 36 | 
         
            +
            This activates the virtual environment, enabling you to install and run dependencies in an isolated environment.
         
     | 
| 37 | 
         
            +
            Here’s the corrected version of point 4, with proper indentation for the subpoints:
         
     | 
| 38 | 
         
            +
             
     | 
| 39 | 
         
            +
             
     | 
| 40 | 
         
            +
            #### 4. Install PyTorch:
         
     | 
| 41 | 
         
            +
             
     | 
| 42 | 
         
            +
            - **For GPU (CUDA-enabled installation):**
         
     | 
| 43 | 
         
            +
              - Check CUDA Version (for GPU setup):
         
     | 
| 44 | 
         
            +
                ```bash
         
     | 
| 45 | 
         
            +
                nvcc --version
         
     | 
| 46 | 
         
            +
                ```
         
     | 
| 47 | 
         
            +
                Find your CUDA version example ```11.8```
         
     | 
| 48 | 
         
            +
             
     | 
| 49 | 
         
            +
              - Visit [PyTorch Get Started](https://pytorch.org/get-started/locally/) and install the version compatible with your CUDA setup.:<br>
         
     | 
| 50 | 
         
            +
                - For CUDA 11.8:
         
     | 
| 51 | 
         
            +
                ```
         
     | 
| 52 | 
         
            +
                pip install torch  --index-url https://download.pytorch.org/whl/cu118
         
     | 
| 53 | 
         
            +
                ```
         
     | 
| 54 | 
         
            +
                - For CUDA 12.1:
         
     | 
| 55 | 
         
            +
                ```
         
     | 
| 56 | 
         
            +
                pip install torch  --index-url https://download.pytorch.org/whl/cu121
         
     | 
| 57 | 
         
            +
                ```
         
     | 
| 58 | 
         
            +
                - For CUDA 12.4:
         
     | 
| 59 | 
         
            +
                ```
         
     | 
| 60 | 
         
            +
                pip install torch  --index-url https://download.pytorch.org/whl/cu124
         
     | 
| 61 | 
         
            +
                ```
         
     | 
| 62 | 
         
            +
            - **For CPU (if not using GPU):**
         
     | 
| 63 | 
         
            +
              ```bash
         
     | 
| 64 | 
         
            +
              pip install torch
         
     | 
| 65 | 
         
            +
              ```
         
     | 
| 66 | 
         
            +
              This installs the CPU-only version of PyTorch.
         
     | 
| 67 | 
         
            +
             
     | 
| 68 | 
         
            +
             
     | 
| 69 | 
         
            +
            #### 5. Install Required Dependencies:
         
     | 
| 70 | 
         
            +
            ```bash
         
     | 
| 71 | 
         
            +
            pip install -r requirements.txt
         
     | 
| 72 | 
         
            +
            ```
         
     | 
| 73 | 
         
            +
            This installs all the required Python libraries listed in the `requirements.txt` file.
         
     | 
| 74 | 
         
            +
             
     | 
| 75 | 
         
            +
            #### 6. Download Model and Get Latest VoicePack:
         
     | 
| 76 | 
         
            +
            ```bash
         
     | 
| 77 | 
         
            +
            python download_model.py
         
     | 
| 78 | 
         
            +
            ```
         
     | 
| 79 | 
         
            +
             
     | 
| 80 | 
         
            +
            ---
         
     | 
| 81 | 
         
            +
             
     | 
| 82 | 
         
            +
            #### 7. Install eSpeak NG
         
     | 
| 83 | 
         
            +
             
     | 
| 84 | 
         
            +
            - **For Windows:**
         
     | 
| 85 | 
         
            +
              1. Download the latest eSpeak NG release from the [eSpeak NG GitHub Releases](https://github.com/espeak-ng/espeak-ng/releases/tag/1.51).
         
     | 
| 86 | 
         
            +
              2. Locate and download the file named **`espeak-ng-X64.msi`**.
         
     | 
| 87 | 
         
            +
              3. Run the installer and follow the installation steps. Ensure that you install eSpeak NG in the default directory:
         
     | 
| 88 | 
         
            +
                 ```
         
     | 
| 89 | 
         
            +
                 C:\Program Files\eSpeak NG
         
     | 
| 90 | 
         
            +
                 ```
         
     | 
| 91 | 
         
            +
                 > **Note:** This default path is required for the application to locate eSpeak NG properly.
         
     | 
| 92 | 
         
            +
             
     | 
| 93 | 
         
            +
            - **For Linux:**
         
     | 
| 94 | 
         
            +
              1. Open your terminal.
         
     | 
| 95 | 
         
            +
              2. Install eSpeak NG using the following command:
         
     | 
| 96 | 
         
            +
                 ```bash
         
     | 
| 97 | 
         
            +
                 sudo apt-get -qq -y install espeak-ng > /dev/null 2>&1
         
     | 
| 98 | 
         
            +
                 ```
         
     | 
| 99 | 
         
            +
                 > **Note:** This command suppresses unnecessary output for a cleaner installation process.
         
     | 
| 100 | 
         
            +
             
     | 
| 101 | 
         
             
            ---
         
     | 
| 102 | 
         | 
| 103 | 
         
            +
            #### 8. Run Gradio App
         
     | 
| 104 | 
         
            +
             
     | 
| 105 | 
         
            +
            To run the Gradio app, follow these steps:
         
     | 
| 106 | 
         
            +
             
     | 
| 107 | 
         
            +
            1. **Activate the Virtual Environment:**
         
     | 
| 108 | 
         
            +
               ```bash
         
     | 
| 109 | 
         
            +
               myenv\Scripts\activate
         
     | 
| 110 | 
         
            +
               ```
         
     | 
| 111 | 
         
            +
             
     | 
| 112 | 
         
            +
            2. **Run the Application:**
         
     | 
| 113 | 
         
            +
               ```bash
         
     | 
| 114 | 
         
            +
               python app.py
         
     | 
| 115 | 
         
            +
               ```
         
     | 
| 116 | 
         
            +
             
     | 
| 117 | 
         
            +
               Alternatively, on Windows, double-click on `run_app.bat` to start the application.
         
     | 
| 118 | 
         
            +
             
     | 
| 119 | 
         
            +
            ---
         
     | 
| 120 | 
         
            +
             
     | 
| 121 | 
         
            +
            
         
     | 
| 122 | 
         
            +
            
         
     | 
| 123 | 
         
            +
            
         
     | 
| 124 | 
         
            +
             
     | 
| 125 | 
         
            +
            ### Credits
         
     | 
| 126 | 
         
            +
            [Kokoro HuggingFace](https://huggingface.co/hexgrad/Kokoro-82M)
         
     | 
| 127 | 
         
            +
             
     | 
    	
        api.py
    ADDED
    
    | 
         @@ -0,0 +1,76 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            # It is helpful if you want to use it in a voice assistant project.
         
     | 
| 2 | 
         
            +
            # Know more about {your gradio app url}/?view=api. Example: http://127.0.0.1:7860/?view=api
         
     | 
| 3 | 
         
            +
            import shutil
         
     | 
| 4 | 
         
            +
            import os
         
     | 
| 5 | 
         
            +
            from gradio_client import Client
         
     | 
| 6 | 
         
            +
             
     | 
| 7 | 
         
            +
            # Ensure the output directory exists
         
     | 
| 8 | 
         
            +
            output_dir = "temp_audio"
         
     | 
| 9 | 
         
            +
            os.makedirs(output_dir, exist_ok=True)
         
     | 
| 10 | 
         
            +
             
     | 
| 11 | 
         
            +
            # Initialize the Gradio client
         
     | 
| 12 | 
         
            +
            api_url = "http://127.0.0.1:7860/"
         
     | 
| 13 | 
         
            +
            client = Client(api_url)
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
            def text_to_speech(
         
     | 
| 16 | 
         
            +
                text="Hello!!",
         
     | 
| 17 | 
         
            +
                model_name="kokoro-v0_19.pth",
         
     | 
| 18 | 
         
            +
                voice_name="af_bella",
         
     | 
| 19 | 
         
            +
                speed=1,
         
     | 
| 20 | 
         
            +
                trim=0,
         
     | 
| 21 | 
         
            +
                pad_between_segments=0,
         
     | 
| 22 | 
         
            +
                remove_silence=False,
         
     | 
| 23 | 
         
            +
                minimum_silence=0.05,
         
     | 
| 24 | 
         
            +
            ):
         
     | 
| 25 | 
         
            +
                """
         
     | 
| 26 | 
         
            +
                Generates speech from text using a specified model and saves the audio file.
         
     | 
| 27 | 
         
            +
             
     | 
| 28 | 
         
            +
                Parameters:
         
     | 
| 29 | 
         
            +
                    text (str): The text to convert to speech.
         
     | 
| 30 | 
         
            +
                    model_name (str): The name of the model to use for synthesis.
         
     | 
| 31 | 
         
            +
                    voice_name (str): The name of the voice to use.
         
     | 
| 32 | 
         
            +
                    speed (float): The speed of speech.
         
     | 
| 33 | 
         
            +
                    trim (int): Whether to trim silence at the beginning and end.
         
     | 
| 34 | 
         
            +
                    pad_between_segments (int): Padding between audio segments.
         
     | 
| 35 | 
         
            +
                    remove_silence (bool): Whether to remove silence from the audio.
         
     | 
| 36 | 
         
            +
                    minimum_silence (float): Minimum silence duration to consider.
         
     | 
| 37 | 
         
            +
                Returns:
         
     | 
| 38 | 
         
            +
                    str: Path to the saved audio file.
         
     | 
| 39 | 
         
            +
                """
         
     | 
| 40 | 
         
            +
                # Call the API with provided parameters
         
     | 
| 41 | 
         
            +
                result = client.predict(
         
     | 
| 42 | 
         
            +
                    text=text,
         
     | 
| 43 | 
         
            +
                    model_name=model_name,
         
     | 
| 44 | 
         
            +
                    voice_name=voice_name,
         
     | 
| 45 | 
         
            +
                    speed=speed,
         
     | 
| 46 | 
         
            +
                    trim=trim,
         
     | 
| 47 | 
         
            +
                    pad_between_segments=pad_between_segments,
         
     | 
| 48 | 
         
            +
                    remove_silence=remove_silence,
         
     | 
| 49 | 
         
            +
                    minimum_silence=minimum_silence,
         
     | 
| 50 | 
         
            +
                    api_name="/text_to_speech"
         
     | 
| 51 | 
         
            +
                )
         
     | 
| 52 | 
         
            +
             
     | 
| 53 | 
         
            +
                # Save the audio file in the specified directory
         
     | 
| 54 | 
         
            +
                save_at = f"{output_dir}/{os.path.basename(result)}"
         
     | 
| 55 | 
         
            +
                shutil.move(result, save_at)
         
     | 
| 56 | 
         
            +
                print(f"Saved at {save_at}")
         
     | 
| 57 | 
         
            +
             
     | 
| 58 | 
         
            +
                return save_at
         
     | 
| 59 | 
         
            +
             
     | 
| 60 | 
         
            +
            # Example usage
         
     | 
| 61 | 
         
            +
            if __name__ == "__main__":
         
     | 
| 62 | 
         
            +
                text="This is Kokoro TTS. I am a text-to-speech model and Super Fast."
         
     | 
| 63 | 
         
            +
                model_name="kokoro-v0_19.pth" #kokoro-v0_19-half.pth
         
     | 
| 64 | 
         
            +
                voice_name="af_bella" #get voice names 
         
     | 
| 65 | 
         
            +
                speed=1
         
     | 
| 66 | 
         
            +
                only_trim_both_ends_silence=0
         
     | 
| 67 | 
         
            +
                add_silence_between_segments=0 #it use in large text
         
     | 
| 68 | 
         
            +
                remove_silence=False
         
     | 
| 69 | 
         
            +
                keep_silence_upto=0.05 #in seconds
         
     | 
| 70 | 
         
            +
                audio_path = text_to_speech(text=text, model_name=model_name, 
         
     | 
| 71 | 
         
            +
                                            voice_name=voice_name, speed=speed, 
         
     | 
| 72 | 
         
            +
                                            trim=only_trim_both_ends_silence,
         
     | 
| 73 | 
         
            +
                                            pad_between_segments=add_silence_between_segments,
         
     | 
| 74 | 
         
            +
                                            remove_silence=remove_silence,
         
     | 
| 75 | 
         
            +
                                            minimum_silence=keep_silence_upto)
         
     | 
| 76 | 
         
            +
                print(f"Audio file saved at: {audio_path}")
         
     | 
    	
        app.py
    ADDED
    
    | 
         @@ -0,0 +1,262 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            from KOKORO.models import build_model
         
     | 
| 2 | 
         
            +
            from KOKORO.utils import tts,tts_file_name,podcast
         
     | 
| 3 | 
         
            +
            import sys
         
     | 
| 4 | 
         
            +
            sys.path.append('.')
         
     | 
| 5 | 
         
            +
            import torch
         
     | 
| 6 | 
         
            +
            import gc 
         
     | 
| 7 | 
         
            +
            print("Loading model...")
         
     | 
| 8 | 
         
            +
            device = 'cuda' if torch.cuda.is_available() else 'cpu'
         
     | 
| 9 | 
         
            +
            print(f'Using device: {device}')
         
     | 
| 10 | 
         
            +
            MODEL = build_model('./KOKORO/kokoro-v0_19.pth', device)
         
     | 
| 11 | 
         
            +
            print("Model loaded successfully.")
         
     | 
| 12 | 
         
            +
             
     | 
| 13 | 
         
            +
            def tts_maker(text,voice_name="af_bella",speed = 0.8,trim=0,pad_between=0,save_path="temp.wav",remove_silence=False,minimum_silence=50):
         
     | 
| 14 | 
         
            +
                # Sanitize the save_path to remove any newline characters
         
     | 
| 15 | 
         
            +
                save_path = save_path.replace('\n', '').replace('\r', '')
         
     | 
| 16 | 
         
            +
                global MODEL
         
     | 
| 17 | 
         
            +
                audio_path=tts(MODEL,device,text,voice_name,speed=speed,trim=trim,pad_between_segments=pad_between,output_file=save_path,remove_silence=remove_silence,minimum_silence=minimum_silence)
         
     | 
| 18 | 
         
            +
                return audio_path
         
     | 
| 19 | 
         
            +
             
     | 
| 20 | 
         
            +
             
     | 
| 21 | 
         
            +
            model_list = ["kokoro-v0_19.pth", "kokoro-v0_19-half.pth"]
         
     | 
| 22 | 
         
            +
            current_model = model_list[0]
         
     | 
| 23 | 
         
            +
             
     | 
| 24 | 
         
            +
            def update_model(model_name):
         
     | 
| 25 | 
         
            +
                """
         
     | 
| 26 | 
         
            +
                Updates the TTS model only if the specified model is not already loaded.
         
     | 
| 27 | 
         
            +
                """
         
     | 
| 28 | 
         
            +
                global MODEL, current_model
         
     | 
| 29 | 
         
            +
                if current_model == model_name:
         
     | 
| 30 | 
         
            +
                    return f"Model already set to {model_name}"  # No need to reload
         
     | 
| 31 | 
         
            +
                model_path = f"./KOKORO/{model_name}"  # Default model path
         
     | 
| 32 | 
         
            +
                if model_name == "kokoro-v0_19-half.pth":
         
     | 
| 33 | 
         
            +
                    model_path = f"./KOKORO/fp16/{model_name}"  # Update path for specific model
         
     | 
| 34 | 
         
            +
                # print(f"Loading new model: {model_name}")
         
     | 
| 35 | 
         
            +
                del MODEL  # Cleanup existing model
         
     | 
| 36 | 
         
            +
                gc.collect()
         
     | 
| 37 | 
         
            +
                torch.cuda.empty_cache()  # Ensure GPU memory is cleared
         
     | 
| 38 | 
         
            +
                MODEL = build_model(model_path, device)
         
     | 
| 39 | 
         
            +
                current_model = model_name
         
     | 
| 40 | 
         
            +
                return f"Model updated to {model_name}"
         
     | 
| 41 | 
         
            +
             
     | 
| 42 | 
         
            +
             
     | 
| 43 | 
         
            +
            def text_to_speech(text, model_name, voice_name, speed, trim, pad_between_segments, remove_silence, minimum_silence):
         
     | 
| 44 | 
         
            +
                """
         
     | 
| 45 | 
         
            +
                Converts text to speech using the specified parameters and ensures the model is updated only if necessary.
         
     | 
| 46 | 
         
            +
                """
         
     | 
| 47 | 
         
            +
                update_status = update_model(model_name)  # Load the model only if required
         
     | 
| 48 | 
         
            +
                # print(update_status)  # Log model loading status
         
     | 
| 49 | 
         
            +
                if not minimum_silence:
         
     | 
| 50 | 
         
            +
                    minimum_silence = 0.05
         
     | 
| 51 | 
         
            +
                keep_silence = int(minimum_silence * 1000)
         
     | 
| 52 | 
         
            +
                save_at = tts_file_name(text)
         
     | 
| 53 | 
         
            +
                audio_path = tts_maker(
         
     | 
| 54 | 
         
            +
                    text, 
         
     | 
| 55 | 
         
            +
                    voice_name, 
         
     | 
| 56 | 
         
            +
                    speed, 
         
     | 
| 57 | 
         
            +
                    trim, 
         
     | 
| 58 | 
         
            +
                    pad_between_segments, 
         
     | 
| 59 | 
         
            +
                    save_at, 
         
     | 
| 60 | 
         
            +
                    remove_silence, 
         
     | 
| 61 | 
         
            +
                    keep_silence
         
     | 
| 62 | 
         
            +
                )
         
     | 
| 63 | 
         
            +
                return audio_path
         
     | 
| 64 | 
         
            +
             
     | 
| 65 | 
         
            +
             
     | 
| 66 | 
         
            +
             
     | 
| 67 | 
         
            +
             
     | 
| 68 | 
         
            +
            import gradio as gr
         
     | 
| 69 | 
         
            +
             
     | 
| 70 | 
         
            +
            # voice_list = [
         
     | 
| 71 | 
         
            +
            #     'af',  # Default voice is a 50-50 mix of af_bella & af_sarah
         
     | 
| 72 | 
         
            +
            #     'af_bella', 'af_sarah', 'am_adam', 'am_michael',
         
     | 
| 73 | 
         
            +
            #     'bf_emma', 'bf_isabella', 'bm_george', 'bm_lewis',
         
     | 
| 74 | 
         
            +
            # ]
         
     | 
| 75 | 
         
            +
             
     | 
| 76 | 
         
            +
             
     | 
| 77 | 
         
            +
             
     | 
| 78 | 
         
            +
            import os
         
     | 
| 79 | 
         
            +
             
     | 
| 80 | 
         
            +
            # Get the list of voice names without file extensions
         
     | 
| 81 | 
         
            +
            voice_list = [
         
     | 
| 82 | 
         
            +
                os.path.splitext(filename)[0]
         
     | 
| 83 | 
         
            +
                for filename in os.listdir("./KOKORO/voices")
         
     | 
| 84 | 
         
            +
                if filename.endswith('.pt')
         
     | 
| 85 | 
         
            +
            ]
         
     | 
| 86 | 
         
            +
             
     | 
| 87 | 
         
            +
            # Sort the list based on the length of each name
         
     | 
| 88 | 
         
            +
            voice_list = sorted(voice_list, key=len)
         
     | 
| 89 | 
         
            +
             
     | 
| 90 | 
         
            +
            def toggle_autoplay(autoplay):
         
     | 
| 91 | 
         
            +
                return gr.Audio(interactive=False, label='Output Audio', autoplay=autoplay)
         
     | 
| 92 | 
         
            +
             
     | 
| 93 | 
         
            +
            with gr.Blocks() as demo1:
         
     | 
| 94 | 
         
            +
                gr.Markdown("# Batched TTS")
         
     | 
| 95 | 
         
            +
                with gr.Row():
         
     | 
| 96 | 
         
            +
                    with gr.Column():
         
     | 
| 97 | 
         
            +
                        text = gr.Textbox(
         
     | 
| 98 | 
         
            +
                            label='Enter Text',
         
     | 
| 99 | 
         
            +
                            lines=3,
         
     | 
| 100 | 
         
            +
                            placeholder="Type your text here..."
         
     | 
| 101 | 
         
            +
                        )
         
     | 
| 102 | 
         
            +
                        with gr.Row():
         
     | 
| 103 | 
         
            +
                            voice = gr.Dropdown(
         
     | 
| 104 | 
         
            +
                                voice_list, 
         
     | 
| 105 | 
         
            +
                                value='af', 
         
     | 
| 106 | 
         
            +
                                allow_custom_value=False, 
         
     | 
| 107 | 
         
            +
                                label='Voice', 
         
     | 
| 108 | 
         
            +
                                info='Starred voices are more stable'
         
     | 
| 109 | 
         
            +
                            )
         
     | 
| 110 | 
         
            +
                        with gr.Row():
         
     | 
| 111 | 
         
            +
                            generate_btn = gr.Button('Generate', variant='primary')
         
     | 
| 112 | 
         
            +
                        with gr.Accordion('Audio Settings', open=False):
         
     | 
| 113 | 
         
            +
                            model_name=gr.Dropdown(model_list,label="Model",value=model_list[0])
         
     | 
| 114 | 
         
            +
                            remove_silence = gr.Checkbox(value=False, label='✂️ Remove Silence From TTS')
         
     | 
| 115 | 
         
            +
                            minimum_silence = gr.Number(
         
     | 
| 116 | 
         
            +
                                label="Keep Silence Upto (In seconds)", 
         
     | 
| 117 | 
         
            +
                                value=0.05
         
     | 
| 118 | 
         
            +
                            )
         
     | 
| 119 | 
         
            +
                            speed = gr.Slider(
         
     | 
| 120 | 
         
            +
                                minimum=0.25, maximum=2, value=1, step=0.1, 
         
     | 
| 121 | 
         
            +
                                label='⚡️Speed', info='Adjust the speaking speed'
         
     | 
| 122 | 
         
            +
                            )
         
     | 
| 123 | 
         
            +
                            trim = gr.Slider(
         
     | 
| 124 | 
         
            +
                                minimum=0, maximum=1, value=0, step=0.1, 
         
     | 
| 125 | 
         
            +
                                label='🔪 Trim', info='How much to cut from both ends of each segment'
         
     | 
| 126 | 
         
            +
                            )   
         
     | 
| 127 | 
         
            +
                            pad_between = gr.Slider(
         
     | 
| 128 | 
         
            +
                                minimum=0, maximum=2, value=0, step=0.1, 
         
     | 
| 129 | 
         
            +
                                label='🔇 Pad Between', info='Silent Duration between segments [For Large Text]'
         
     | 
| 130 | 
         
            +
                            )
         
     | 
| 131 | 
         
            +
                            
         
     | 
| 132 | 
         
            +
                    with gr.Column():
         
     | 
| 133 | 
         
            +
                        audio = gr.Audio(interactive=False, label='Output Audio', autoplay=True)
         
     | 
| 134 | 
         
            +
                        with gr.Accordion('Enable Autoplay', open=False):
         
     | 
| 135 | 
         
            +
                            autoplay = gr.Checkbox(value=True, label='Autoplay')
         
     | 
| 136 | 
         
            +
                            autoplay.change(toggle_autoplay, inputs=[autoplay], outputs=[audio])
         
     | 
| 137 | 
         
            +
             
     | 
| 138 | 
         
            +
                text.submit(
         
     | 
| 139 | 
         
            +
                    text_to_speech, 
         
     | 
| 140 | 
         
            +
                    inputs=[text, model_name,voice, speed, trim, pad_between, remove_silence, minimum_silence], 
         
     | 
| 141 | 
         
            +
                    outputs=[audio]
         
     | 
| 142 | 
         
            +
                )
         
     | 
| 143 | 
         
            +
                generate_btn.click(
         
     | 
| 144 | 
         
            +
                    text_to_speech, 
         
     | 
| 145 | 
         
            +
                    inputs=[text,model_name, voice, speed, trim, pad_between, remove_silence, minimum_silence], 
         
     | 
| 146 | 
         
            +
                    outputs=[audio]
         
     | 
| 147 | 
         
            +
                )
         
     | 
| 148 | 
         
            +
             
     | 
| 149 | 
         
            +
            def podcast_maker(text,remove_silence=False,minimum_silence=50,model_name="kokoro-v0_19.pth"):
         
     | 
| 150 | 
         
            +
                global MODEL,device
         
     | 
| 151 | 
         
            +
                update_model(model_name)
         
     | 
| 152 | 
         
            +
                if not minimum_silence:
         
     | 
| 153 | 
         
            +
                    minimum_silence = 0.05
         
     | 
| 154 | 
         
            +
                keep_silence = int(minimum_silence * 1000)
         
     | 
| 155 | 
         
            +
                podcast_save_at=podcast(MODEL, device,text,remove_silence=remove_silence, minimum_silence=keep_silence)
         
     | 
| 156 | 
         
            +
                return podcast_save_at
         
     | 
| 157 | 
         
            +
                
         
     | 
| 158 | 
         
            +
             
     | 
| 159 | 
         
            +
             
     | 
| 160 | 
         
            +
            dummpy_example="""{af} Hello, I'd like to order a sandwich please.                                                         
         
     | 
| 161 | 
         
            +
            {af_sky} What do you mean you're out of bread?                                                                      
         
     | 
| 162 | 
         
            +
            {af_bella} I really wanted a sandwich though...                                                              
         
     | 
| 163 | 
         
            +
            {af_nicole} You know what, darn you and your little shop!                                                                       
         
     | 
| 164 | 
         
            +
            {bm_george} I'll just go back home and cry now.                                                                           
         
     | 
| 165 | 
         
            +
            {am_adam} Why me?"""
         
     | 
| 166 | 
         
            +
            with gr.Blocks() as demo2:
         
     | 
| 167 | 
         
            +
                gr.Markdown(
         
     | 
| 168 | 
         
            +
                    """
         
     | 
| 169 | 
         
            +
                # Multiple Speech-Type Generation
         
     | 
| 170 | 
         
            +
                This section allows you to generate multiple speech types or multiple people's voices. Enter your text in the format shown below, and the system will generate speech using the appropriate type. If unspecified, the model will use "af" voice.
         
     | 
| 171 | 
         
            +
                Format:
         
     | 
| 172 | 
         
            +
                {voice_name} your text here
         
     | 
| 173 | 
         
            +
                """
         
     | 
| 174 | 
         
            +
                )
         
     | 
| 175 | 
         
            +
                with gr.Row():
         
     | 
| 176 | 
         
            +
                    gr.Markdown(
         
     | 
| 177 | 
         
            +
                        """
         
     | 
| 178 | 
         
            +
                        **Example Input:**                                                                      
         
     | 
| 179 | 
         
            +
                        {af} Hello, I'd like to order a sandwich please.                                                         
         
     | 
| 180 | 
         
            +
                        {af_sky} What do you mean you're out of bread?                                                                      
         
     | 
| 181 | 
         
            +
                        {af_bella} I really wanted a sandwich though...                                                              
         
     | 
| 182 | 
         
            +
                        {af_nicole} You know what, darn you and your little shop!                                                                       
         
     | 
| 183 | 
         
            +
                        {bm_george} I'll just go back home and cry now.                                                                           
         
     | 
| 184 | 
         
            +
                        {am_adam} Why me?!                                                                         
         
     | 
| 185 | 
         
            +
                        """
         
     | 
| 186 | 
         
            +
                    )
         
     | 
| 187 | 
         
            +
                with gr.Row():
         
     | 
| 188 | 
         
            +
                    with gr.Column():
         
     | 
| 189 | 
         
            +
                        text = gr.Textbox(
         
     | 
| 190 | 
         
            +
                            label='Enter Text',
         
     | 
| 191 | 
         
            +
                            lines=7,
         
     | 
| 192 | 
         
            +
                            placeholder=dummpy_example
         
     | 
| 193 | 
         
            +
                        )
         
     | 
| 194 | 
         
            +
                        with gr.Row():
         
     | 
| 195 | 
         
            +
                            generate_btn = gr.Button('Generate', variant='primary')
         
     | 
| 196 | 
         
            +
                        with gr.Accordion('Audio Settings', open=False):
         
     | 
| 197 | 
         
            +
                            remove_silence = gr.Checkbox(value=False, label='✂️ Remove Silence From TTS')
         
     | 
| 198 | 
         
            +
                            minimum_silence = gr.Number(
         
     | 
| 199 | 
         
            +
                                label="Keep Silence Upto (In seconds)", 
         
     | 
| 200 | 
         
            +
                                value=0.20
         
     | 
| 201 | 
         
            +
                            )
         
     | 
| 202 | 
         
            +
                    with gr.Column():
         
     | 
| 203 | 
         
            +
                        audio = gr.Audio(interactive=False, label='Output Audio', autoplay=True)
         
     | 
| 204 | 
         
            +
                        with gr.Accordion('Enable Autoplay', open=False):
         
     | 
| 205 | 
         
            +
                            autoplay = gr.Checkbox(value=True, label='Autoplay')
         
     | 
| 206 | 
         
            +
                            autoplay.change(toggle_autoplay, inputs=[autoplay], outputs=[audio])
         
     | 
| 207 | 
         
            +
             
     | 
| 208 | 
         
            +
                text.submit(
         
     | 
| 209 | 
         
            +
                    podcast_maker, 
         
     | 
| 210 | 
         
            +
                    inputs=[text, remove_silence, minimum_silence], 
         
     | 
| 211 | 
         
            +
                    outputs=[audio]
         
     | 
| 212 | 
         
            +
                )
         
     | 
| 213 | 
         
            +
                generate_btn.click(
         
     | 
| 214 | 
         
            +
                    podcast_maker, 
         
     | 
| 215 | 
         
            +
                    inputs=[text, remove_silence, minimum_silence], 
         
     | 
| 216 | 
         
            +
                    outputs=[audio]
         
     | 
| 217 | 
         
            +
                )
         
     | 
| 218 | 
         
            +
             
     | 
| 219 | 
         
            +
            display_text = "  \n".join(voice_list)
         
     | 
| 220 | 
         
            +
             
     | 
| 221 | 
         
            +
            with gr.Blocks() as demo3:
         
     | 
| 222 | 
         
            +
                gr.Markdown(f"# Voice Names \n{display_text}")
         
     | 
| 223 | 
         
            +
             
     | 
| 224 | 
         
            +
            import click
         
     | 
| 225 | 
         
            +
            @click.command()
         
     | 
| 226 | 
         
            +
            @click.option("--debug", is_flag=True, default=False, help="Enable debug mode.")
         
     | 
| 227 | 
         
            +
            @click.option("--share", is_flag=True, default=False, help="Enable sharing of the interface.")
         
     | 
| 228 | 
         
            +
            def main(debug, share):
         
     | 
| 229 | 
         
            +
                demo = gr.TabbedInterface([demo1, demo2,demo3], ["Batched TTS", "Multiple Speech-Type Generation","Available Voice Names"],title="Kokoro TTS")
         
     | 
| 230 | 
         
            +
             
     | 
| 231 | 
         
            +
                demo.queue().launch(debug=debug, share=share)
         
     | 
| 232 | 
         
            +
                #Run on local network
         
     | 
| 233 | 
         
            +
                # laptop_ip="192.168.0.30"
         
     | 
| 234 | 
         
            +
                # port=8080
         
     | 
| 235 | 
         
            +
                # demo.queue().launch(debug=debug, share=share,server_name=laptop_ip,server_port=port)
         
     | 
| 236 | 
         
            +
             
     | 
| 237 | 
         
            +
            if __name__ == "__main__":
         
     | 
| 238 | 
         
            +
                main()    
         
     | 
| 239 | 
         
            +
             
     | 
| 240 | 
         
            +
             
     | 
| 241 | 
         
            +
            ##For client side
         
     | 
| 242 | 
         
            +
            # from gradio_client import Client
         
     | 
| 243 | 
         
            +
            # import shutil
         
     | 
| 244 | 
         
            +
            # import os
         
     | 
| 245 | 
         
            +
            # os.makedirs("temp_audio", exist_ok=True)
         
     | 
| 246 | 
         
            +
            # from gradio_client import Client
         
     | 
| 247 | 
         
            +
            # client = Client("http://127.0.0.1:7860/")
         
     | 
| 248 | 
         
            +
            # result = client.predict(
         
     | 
| 249 | 
         
            +
            # 		text="Hello!!",
         
     | 
| 250 | 
         
            +
            # 		model_name="kokoro-v0_19.pth",
         
     | 
| 251 | 
         
            +
            # 		voice_name="af_bella",
         
     | 
| 252 | 
         
            +
            # 		speed=1,
         
     | 
| 253 | 
         
            +
            # 		trim=0,
         
     | 
| 254 | 
         
            +
            # 		pad_between_segments=0,
         
     | 
| 255 | 
         
            +
            # 		remove_silence=False,
         
     | 
| 256 | 
         
            +
            # 		minimum_silence=0.05,
         
     | 
| 257 | 
         
            +
            # 		api_name="/text_to_speech"
         
     | 
| 258 | 
         
            +
            # )
         
     | 
| 259 | 
         
            +
             
     | 
| 260 | 
         
            +
            # save_at=f"./temp_audio/{os.path.basename(result)}"
         
     | 
| 261 | 
         
            +
            # shutil.move(result, save_at)
         
     | 
| 262 | 
         
            +
            # print(f"Saved at {save_at}")
         
     | 
    	
        download_model.py
    ADDED
    
    | 
         @@ -0,0 +1,174 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            from huggingface_hub import list_repo_files, hf_hub_download
         
     | 
| 2 | 
         
            +
            import os
         
     | 
| 3 | 
         
            +
            import shutil
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            # Repository ID
         
     | 
| 6 | 
         
            +
            repo_id = "hexgrad/Kokoro-82M"
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
            # Set up the cache directory
         
     | 
| 9 | 
         
            +
            cache_dir = "./cache"  # Customize this path if needed
         
     | 
| 10 | 
         
            +
            os.makedirs(cache_dir, exist_ok=True)
         
     | 
| 11 | 
         
            +
             
     | 
| 12 | 
         
            +
            def get_voice_models():
         
     | 
| 13 | 
         
            +
                # Ensure the 'voices' directory exists
         
     | 
| 14 | 
         
            +
                voices_dir = './KOKORO/voices'
         
     | 
| 15 | 
         
            +
                if os.path.exists(voices_dir):
         
     | 
| 16 | 
         
            +
                    shutil.rmtree(voices_dir)
         
     | 
| 17 | 
         
            +
                os.makedirs(voices_dir, exist_ok=True)
         
     | 
| 18 | 
         
            +
             
     | 
| 19 | 
         
            +
                # Get the list of all files
         
     | 
| 20 | 
         
            +
                files = list_repo_files(repo_id)
         
     | 
| 21 | 
         
            +
             
     | 
| 22 | 
         
            +
                # Filter files for the 'voices/' folder
         
     | 
| 23 | 
         
            +
                voice_files = [file.replace("voices/", "") for file in files if file.startswith("voices/")]
         
     | 
| 24 | 
         
            +
             
     | 
| 25 | 
         
            +
                # Get current files in the 'voices' folder
         
     | 
| 26 | 
         
            +
                current_voice = os.listdir(voices_dir)
         
     | 
| 27 | 
         
            +
             
     | 
| 28 | 
         
            +
                # Identify files that need to be downloaded
         
     | 
| 29 | 
         
            +
                download_voice = [file for file in voice_files if file not in current_voice]
         
     | 
| 30 | 
         
            +
                if download_voice:
         
     | 
| 31 | 
         
            +
                    print(f"Files to download: {download_voice}")
         
     | 
| 32 | 
         
            +
             
     | 
| 33 | 
         
            +
                # Download each missing file
         
     | 
| 34 | 
         
            +
                for file in download_voice:
         
     | 
| 35 | 
         
            +
                    file_path = hf_hub_download(repo_id=repo_id, filename=f"voices/{file}", cache_dir=cache_dir)
         
     | 
| 36 | 
         
            +
                    target_path = os.path.join(voices_dir, file)
         
     | 
| 37 | 
         
            +
                    shutil.copy(file_path, target_path)
         
     | 
| 38 | 
         
            +
                    print(f"Downloaded: {file} to {target_path}")
         
     | 
| 39 | 
         
            +
             
     | 
| 40 | 
         
            +
            # Call the function to execute the code
         
     | 
| 41 | 
         
            +
            get_voice_models()
         
     | 
| 42 | 
         
            +
             
     | 
| 43 | 
         
            +
            # Check and download additional required files with caching
         
     | 
| 44 | 
         
            +
            kokoro_file = "kokoro-v0_19.pth"
         
     | 
| 45 | 
         
            +
            fp16_file = "fp16/kokoro-v0_19-half.pth"
         
     | 
| 46 | 
         
            +
             
     | 
| 47 | 
         
            +
            if kokoro_file not in os.listdir("./KOKORO/"):
         
     | 
| 48 | 
         
            +
                file_path = hf_hub_download(repo_id=repo_id, filename=kokoro_file, cache_dir=cache_dir)
         
     | 
| 49 | 
         
            +
                shutil.copy(file_path, os.path.join("./KOKORO/", kokoro_file))
         
     | 
| 50 | 
         
            +
                print(f"Downloaded: {kokoro_file} to ./KOKORO/")
         
     | 
| 51 | 
         
            +
             
     | 
| 52 | 
         
            +
            if "fp16" not in os.listdir("./KOKORO/"):
         
     | 
| 53 | 
         
            +
                os.makedirs("./KOKORO/fp16", exist_ok=True)
         
     | 
| 54 | 
         
            +
             
     | 
| 55 | 
         
            +
            if os.path.basename(fp16_file) not in os.listdir("./KOKORO/fp16/"):
         
     | 
| 56 | 
         
            +
                file_path = hf_hub_download(repo_id=repo_id, filename=fp16_file, cache_dir=cache_dir)
         
     | 
| 57 | 
         
            +
                shutil.copy(file_path, os.path.join("./KOKORO/fp16/", os.path.basename(fp16_file)))
         
     | 
| 58 | 
         
            +
                print(f"Downloaded: {os.path.basename(fp16_file)} to ./KOKORO/fp16/")
         
     | 
| 59 | 
         
            +
             
     | 
| 60 | 
         
            +
             
     | 
| 61 | 
         
            +
             
     | 
| 62 | 
         
            +
             
     | 
| 63 | 
         
            +
            #For Windows one click run
         
     | 
| 64 | 
         
            +
            import os
         
     | 
| 65 | 
         
            +
            import platform
         
     | 
| 66 | 
         
            +
             
     | 
| 67 | 
         
            +
            def setup_batch_file():
         
     | 
| 68 | 
         
            +
                # Check if the system is Windows
         
     | 
| 69 | 
         
            +
                if platform.system() == "Windows":
         
     | 
| 70 | 
         
            +
                    # Check if 'run.bat' exists in the current folder
         
     | 
| 71 | 
         
            +
                    if os.path.exists("run.bat"):
         
     | 
| 72 | 
         
            +
                        print("'run.bat' already exists in the current folder.")
         
     | 
| 73 | 
         
            +
                    else:
         
     | 
| 74 | 
         
            +
                        # Content for run_app.bat
         
     | 
| 75 | 
         
            +
                        bat_content_app = '''@echo off
         
     | 
| 76 | 
         
            +
            call myenv\\Scripts\\activate
         
     | 
| 77 | 
         
            +
            @python.exe app.py %*
         
     | 
| 78 | 
         
            +
            @pause
         
     | 
| 79 | 
         
            +
            '''
         
     | 
| 80 | 
         
            +
                        # Save the content to run_app.bat
         
     | 
| 81 | 
         
            +
                        with open('run_app.bat', 'w') as bat_file:
         
     | 
| 82 | 
         
            +
                            bat_file.write(bat_content_app)
         
     | 
| 83 | 
         
            +
                        print("The 'run_app.bat' file has been created.")
         
     | 
| 84 | 
         
            +
                else:
         
     | 
| 85 | 
         
            +
                    print("This system is not Windows. Batch file creation skipped.")
         
     | 
| 86 | 
         
            +
             
     | 
| 87 | 
         
            +
            # Run the setup function
         
     | 
| 88 | 
         
            +
            setup_batch_file()
         
     | 
| 89 | 
         
            +
             
     | 
| 90 | 
         
            +
             
     | 
| 91 | 
         
            +
             
     | 
| 92 | 
         
            +
             
     | 
| 93 | 
         
            +
            import torch
         
     | 
| 94 | 
         
            +
            import os
         
     | 
| 95 | 
         
            +
            from itertools import combinations
         
     | 
| 96 | 
         
            +
             
     | 
| 97 | 
         
            +
            def mix_all_voices(folder_path="./KOKORO/voices"):
         
     | 
| 98 | 
         
            +
                """Mix all pairs of voice models and save the new models."""
         
     | 
| 99 | 
         
            +
                # Get the list of available voice packs
         
     | 
| 100 | 
         
            +
                available_voice_pack = [
         
     | 
| 101 | 
         
            +
                    os.path.splitext(filename)[0]
         
     | 
| 102 | 
         
            +
                    for filename in os.listdir(folder_path)
         
     | 
| 103 | 
         
            +
                    if filename.endswith('.pt')
         
     | 
| 104 | 
         
            +
                ]
         
     | 
| 105 | 
         
            +
             
     | 
| 106 | 
         
            +
                # Generate all unique pairs of voices
         
     | 
| 107 | 
         
            +
                voice_combinations = combinations(available_voice_pack, 2)
         
     | 
| 108 | 
         
            +
             
     | 
| 109 | 
         
            +
                # def mix_model(voice_1, voice_2, weight_1=0.6, weight_2=0.4):
         
     | 
| 110 | 
         
            +
                #     """Mix two voice models with a weighted average and save the new model."""
         
     | 
| 111 | 
         
            +
                #     new_name = f"{voice_1}_mix_{voice_2}"
         
     | 
| 112 | 
         
            +
                #     voice_id_1 = torch.load(f'{folder_path}/{voice_1}.pt', weights_only=True)
         
     | 
| 113 | 
         
            +
                #     voice_id_2 = torch.load(f'{folder_path}/{voice_2}.pt', weights_only=True)
         
     | 
| 114 | 
         
            +
                    
         
     | 
| 115 | 
         
            +
                #     # Create the mixed model using a weighted average
         
     | 
| 116 | 
         
            +
                #     mixed_voice = (weight_1 * voice_id_1) + (weight_2 * voice_id_2)
         
     | 
| 117 | 
         
            +
                    
         
     | 
| 118 | 
         
            +
                #     # Save the mixed model
         
     | 
| 119 | 
         
            +
                #     torch.save(mixed_voice, f'{folder_path}/{new_name}.pt')
         
     | 
| 120 | 
         
            +
                #     print(f"Created new voice model: {new_name}")
         
     | 
| 121 | 
         
            +
                
         
     | 
| 122 | 
         
            +
             
     | 
| 123 | 
         
            +
                
         
     | 
| 124 | 
         
            +
                # Function to mix two voices
         
     | 
| 125 | 
         
            +
                def mix_model(voice_1, voice_2):
         
     | 
| 126 | 
         
            +
                    """Mix two voice models and save the new model."""
         
     | 
| 127 | 
         
            +
                    new_name = f"{voice_1}_mix_{voice_2}"
         
     | 
| 128 | 
         
            +
                    voice_id_1 = torch.load(f'{folder_path}/{voice_1}.pt', weights_only=True)
         
     | 
| 129 | 
         
            +
                    voice_id_2 = torch.load(f'{folder_path}/{voice_2}.pt', weights_only=True)
         
     | 
| 130 | 
         
            +
             
     | 
| 131 | 
         
            +
                    # Create the mixed model by averaging the weights
         
     | 
| 132 | 
         
            +
                    mixed_voice = torch.mean(torch.stack([voice_id_1, voice_id_2]), dim=0)
         
     | 
| 133 | 
         
            +
             
     | 
| 134 | 
         
            +
                    # Save the mixed model
         
     | 
| 135 | 
         
            +
                    torch.save(mixed_voice, f'{folder_path}/{new_name}.pt')
         
     | 
| 136 | 
         
            +
                    print(f"Created new voice model: {new_name}")
         
     | 
| 137 | 
         
            +
             
     | 
| 138 | 
         
            +
                # Create mixed voices for each pair
         
     | 
| 139 | 
         
            +
                for voice_1, voice_2 in voice_combinations:
         
     | 
| 140 | 
         
            +
                    print(f"Mixing {voice_1} ❤️ {voice_2}")
         
     | 
| 141 | 
         
            +
                    mix_model(voice_1, voice_2)
         
     | 
| 142 | 
         
            +
             
     | 
| 143 | 
         
            +
            # Call the function to mix all voices
         
     | 
| 144 | 
         
            +
            mix_all_voices("./KOKORO/voices")
         
     | 
| 145 | 
         
            +
             
     | 
| 146 | 
         
            +
             
     | 
| 147 | 
         
            +
            def save_voice_names(directory="./KOKORO/voices", output_file="./voice_names.txt"):
         
     | 
| 148 | 
         
            +
                """
         
     | 
| 149 | 
         
            +
                Retrieves voice names from a directory, sorts them by length, and saves to a file.
         
     | 
| 150 | 
         
            +
             
     | 
| 151 | 
         
            +
                Parameters:
         
     | 
| 152 | 
         
            +
                    directory (str): Directory containing the voice files.
         
     | 
| 153 | 
         
            +
                    output_file (str): File to save the sorted voice names.
         
     | 
| 154 | 
         
            +
             
     | 
| 155 | 
         
            +
                Returns:
         
     | 
| 156 | 
         
            +
                    None
         
     | 
| 157 | 
         
            +
                """
         
     | 
| 158 | 
         
            +
                # Get the list of voice names without file extensions
         
     | 
| 159 | 
         
            +
                voice_list = [
         
     | 
| 160 | 
         
            +
                    os.path.splitext(filename)[0]
         
     | 
| 161 | 
         
            +
                    for filename in os.listdir(directory)
         
     | 
| 162 | 
         
            +
                    if filename.endswith('.pt')
         
     | 
| 163 | 
         
            +
                ]
         
     | 
| 164 | 
         
            +
             
     | 
| 165 | 
         
            +
                # Sort the list based on the length of each name
         
     | 
| 166 | 
         
            +
                voice_list = sorted(voice_list, key=len)
         
     | 
| 167 | 
         
            +
             
     | 
| 168 | 
         
            +
                # Save the sorted list to the specified file
         
     | 
| 169 | 
         
            +
                with open(output_file, "w") as f:
         
     | 
| 170 | 
         
            +
                    for voice_name in voice_list:
         
     | 
| 171 | 
         
            +
                        f.write(f"{voice_name}\n")
         
     | 
| 172 | 
         
            +
             
     | 
| 173 | 
         
            +
                print(f"Voice names saved to {output_file}")
         
     | 
| 174 | 
         
            +
            save_voice_names()
         
     | 
    	
        requirements.txt
    ADDED
    
    | 
         @@ -0,0 +1,14 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            phonemizer>=3.3.0
         
     | 
| 2 | 
         
            +
            scipy>=1.14.1
         
     | 
| 3 | 
         
            +
            munch>=4.0.0
         
     | 
| 4 | 
         
            +
            transformers>=4.47.1
         
     | 
| 5 | 
         
            +
            click>=8.1.8
         
     | 
| 6 | 
         
            +
            librosa>=0.10.2
         
     | 
| 7 | 
         
            +
            simpleaudio>=1.0.4
         
     | 
| 8 | 
         
            +
            gradio>=5.9.1
         
     | 
| 9 | 
         
            +
            huggingface-hub>=0.27.0
         
     | 
| 10 | 
         
            +
            pydub>=0.25.1
         
     | 
| 11 | 
         
            +
            pysrt>=1.1.2
         
     | 
| 12 | 
         
            +
            # fastapi>=0.115.6
         
     | 
| 13 | 
         
            +
            # uvicorn>=0.34.0
         
     | 
| 14 | 
         
            +
            # torch
         
     | 
    	
        srt_dubbing.py
    ADDED
    
    | 
         @@ -0,0 +1,557 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            from KOKORO.models import build_model
         
     | 
| 2 | 
         
            +
            from KOKORO.utils import tts,tts_file_name,podcast
         
     | 
| 3 | 
         
            +
            import sys
         
     | 
| 4 | 
         
            +
            sys.path.append('.')
         
     | 
| 5 | 
         
            +
            import torch
         
     | 
| 6 | 
         
            +
            import gc 
         
     | 
| 7 | 
         
            +
            print("Loading model...")
         
     | 
| 8 | 
         
            +
            device = 'cuda' if torch.cuda.is_available() else 'cpu'
         
     | 
| 9 | 
         
            +
            print(f'Using device: {device}')
         
     | 
| 10 | 
         
            +
            MODEL = build_model('./KOKORO/kokoro-v0_19.pth', device)
         
     | 
| 11 | 
         
            +
            print("Model loaded successfully.")
         
     | 
| 12 | 
         
            +
             
     | 
| 13 | 
         
            +
            def tts_maker(text,voice_name="af_bella",speed = 0.8,trim=0,pad_between=0,save_path="temp.wav",remove_silence=False,minimum_silence=50):
         
     | 
| 14 | 
         
            +
                # Sanitize the save_path to remove any newline characters
         
     | 
| 15 | 
         
            +
                save_path = save_path.replace('\n', '').replace('\r', '')
         
     | 
| 16 | 
         
            +
                global MODEL
         
     | 
| 17 | 
         
            +
                audio_path=tts(MODEL,device,text,voice_name,speed=speed,trim=trim,pad_between_segments=pad_between,output_file=save_path,remove_silence=remove_silence,minimum_silence=minimum_silence)
         
     | 
| 18 | 
         
            +
                return audio_path
         
     | 
| 19 | 
         
            +
             
     | 
| 20 | 
         
            +
             
     | 
| 21 | 
         
            +
            model_list = ["kokoro-v0_19.pth", "kokoro-v0_19-half.pth"]
         
     | 
| 22 | 
         
            +
            current_model = model_list[0]
         
     | 
| 23 | 
         
            +
             
     | 
| 24 | 
         
            +
            def update_model(model_name):
         
     | 
| 25 | 
         
            +
                """
         
     | 
| 26 | 
         
            +
                Updates the TTS model only if the specified model is not already loaded.
         
     | 
| 27 | 
         
            +
                """
         
     | 
| 28 | 
         
            +
                global MODEL, current_model
         
     | 
| 29 | 
         
            +
                if current_model == model_name:
         
     | 
| 30 | 
         
            +
                    return f"Model already set to {model_name}"  # No need to reload
         
     | 
| 31 | 
         
            +
                model_path = f"./KOKORO/{model_name}"  # Default model path
         
     | 
| 32 | 
         
            +
                if model_name == "kokoro-v0_19-half.pth":
         
     | 
| 33 | 
         
            +
                    model_path = f"./KOKORO/fp16/{model_name}"  # Update path for specific model
         
     | 
| 34 | 
         
            +
                # print(f"Loading new model: {model_name}")
         
     | 
| 35 | 
         
            +
                del MODEL  # Cleanup existing model
         
     | 
| 36 | 
         
            +
                gc.collect()
         
     | 
| 37 | 
         
            +
                torch.cuda.empty_cache()  # Ensure GPU memory is cleared
         
     | 
| 38 | 
         
            +
                MODEL = build_model(model_path, device)
         
     | 
| 39 | 
         
            +
                current_model = model_name
         
     | 
| 40 | 
         
            +
                return f"Model updated to {model_name}"
         
     | 
| 41 | 
         
            +
             
     | 
| 42 | 
         
            +
             
     | 
| 43 | 
         
            +
             
     | 
| 44 | 
         
            +
            def text_to_speech(text, model_name="kokoro-v0_19.pth", voice_name="af", speed=1.0, trim=1.0, pad_between_segments=0, remove_silence=True, minimum_silence=0.20):
         
     | 
| 45 | 
         
            +
                """
         
     | 
| 46 | 
         
            +
                Converts text to speech using the specified parameters and ensures the model is updated only if necessary.
         
     | 
| 47 | 
         
            +
                """
         
     | 
| 48 | 
         
            +
                update_status = update_model(model_name)  # Load the model only if required
         
     | 
| 49 | 
         
            +
                # print(update_status)  # Log model loading status
         
     | 
| 50 | 
         
            +
                if not minimum_silence:
         
     | 
| 51 | 
         
            +
                    minimum_silence = 0.05
         
     | 
| 52 | 
         
            +
                keep_silence = int(minimum_silence * 1000)
         
     | 
| 53 | 
         
            +
                save_at = tts_file_name(text)
         
     | 
| 54 | 
         
            +
                audio_path = tts_maker(
         
     | 
| 55 | 
         
            +
                    text, 
         
     | 
| 56 | 
         
            +
                    voice_name, 
         
     | 
| 57 | 
         
            +
                    speed, 
         
     | 
| 58 | 
         
            +
                    trim, 
         
     | 
| 59 | 
         
            +
                    pad_between_segments, 
         
     | 
| 60 | 
         
            +
                    save_at, 
         
     | 
| 61 | 
         
            +
                    remove_silence, 
         
     | 
| 62 | 
         
            +
                    keep_silence
         
     | 
| 63 | 
         
            +
                )
         
     | 
| 64 | 
         
            +
                return audio_path
         
     | 
| 65 | 
         
            +
             
     | 
| 66 | 
         
            +
             
     | 
| 67 | 
         
            +
             
     | 
| 68 | 
         
            +
             
     | 
| 69 | 
         
            +
            import gradio as gr
         
     | 
| 70 | 
         
            +
             
     | 
| 71 | 
         
            +
            # voice_list = [
         
     | 
| 72 | 
         
            +
            #     'af',  # Default voice is a 50-50 mix of af_bella & af_sarah
         
     | 
| 73 | 
         
            +
            #     'af_bella', 'af_sarah', 'am_adam', 'am_michael',
         
     | 
| 74 | 
         
            +
            #     'bf_emma', 'bf_isabella', 'bm_george', 'bm_lewis',
         
     | 
| 75 | 
         
            +
            # ]
         
     | 
| 76 | 
         
            +
             
     | 
| 77 | 
         
            +
             
     | 
| 78 | 
         
            +
             
     | 
| 79 | 
         
            +
            import os
         
     | 
| 80 | 
         
            +
             
     | 
| 81 | 
         
            +
            # Get the list of voice names without file extensions
         
     | 
| 82 | 
         
            +
            voice_list = [
         
     | 
| 83 | 
         
            +
                os.path.splitext(filename)[0]
         
     | 
| 84 | 
         
            +
                for filename in os.listdir("./KOKORO/voices")
         
     | 
| 85 | 
         
            +
                if filename.endswith('.pt')
         
     | 
| 86 | 
         
            +
            ]
         
     | 
| 87 | 
         
            +
             
     | 
| 88 | 
         
            +
            # Sort the list based on the length of each name
         
     | 
| 89 | 
         
            +
            voice_list = sorted(voice_list, key=len)
         
     | 
| 90 | 
         
            +
             
     | 
| 91 | 
         
            +
            def toggle_autoplay(autoplay):
         
     | 
| 92 | 
         
            +
                return gr.Audio(interactive=False, label='Output Audio', autoplay=autoplay)
         
     | 
| 93 | 
         
            +
             
     | 
| 94 | 
         
            +
            with gr.Blocks() as demo1:
         
     | 
| 95 | 
         
            +
                gr.Markdown("# Batched TTS")
         
     | 
| 96 | 
         
            +
                with gr.Row():
         
     | 
| 97 | 
         
            +
                    with gr.Column():
         
     | 
| 98 | 
         
            +
                        text = gr.Textbox(
         
     | 
| 99 | 
         
            +
                            label='Enter Text',
         
     | 
| 100 | 
         
            +
                            lines=3,
         
     | 
| 101 | 
         
            +
                            placeholder="Type your text here..."
         
     | 
| 102 | 
         
            +
                        )
         
     | 
| 103 | 
         
            +
                        with gr.Row():
         
     | 
| 104 | 
         
            +
                            voice = gr.Dropdown(
         
     | 
| 105 | 
         
            +
                                voice_list, 
         
     | 
| 106 | 
         
            +
                                value='af', 
         
     | 
| 107 | 
         
            +
                                allow_custom_value=False, 
         
     | 
| 108 | 
         
            +
                                label='Voice', 
         
     | 
| 109 | 
         
            +
                                info='Starred voices are more stable'
         
     | 
| 110 | 
         
            +
                            )
         
     | 
| 111 | 
         
            +
                        with gr.Row():
         
     | 
| 112 | 
         
            +
                            generate_btn = gr.Button('Generate', variant='primary')
         
     | 
| 113 | 
         
            +
                        with gr.Accordion('Audio Settings', open=False):
         
     | 
| 114 | 
         
            +
                            model_name=gr.Dropdown(model_list,label="Model",value=model_list[0])
         
     | 
| 115 | 
         
            +
                            remove_silence = gr.Checkbox(value=False, label='✂️ Remove Silence From TTS')
         
     | 
| 116 | 
         
            +
                            minimum_silence = gr.Number(
         
     | 
| 117 | 
         
            +
                                label="Keep Silence Upto (In seconds)", 
         
     | 
| 118 | 
         
            +
                                value=0.05
         
     | 
| 119 | 
         
            +
                            )
         
     | 
| 120 | 
         
            +
                            speed = gr.Slider(
         
     | 
| 121 | 
         
            +
                                minimum=0.25, maximum=2, value=1, step=0.1, 
         
     | 
| 122 | 
         
            +
                                label='⚡️Speed', info='Adjust the speaking speed'
         
     | 
| 123 | 
         
            +
                            )
         
     | 
| 124 | 
         
            +
                            trim = gr.Slider(
         
     | 
| 125 | 
         
            +
                                minimum=0, maximum=1, value=0, step=0.1, 
         
     | 
| 126 | 
         
            +
                                label='🔪 Trim', info='How much to cut from both ends of each segment'
         
     | 
| 127 | 
         
            +
                            )   
         
     | 
| 128 | 
         
            +
                            pad_between = gr.Slider(
         
     | 
| 129 | 
         
            +
                                minimum=0, maximum=2, value=0, step=0.1, 
         
     | 
| 130 | 
         
            +
                                label='🔇 Pad Between', info='Silent Duration between segments [For Large Text]'
         
     | 
| 131 | 
         
            +
                            )
         
     | 
| 132 | 
         
            +
                            
         
     | 
| 133 | 
         
            +
                    with gr.Column():
         
     | 
| 134 | 
         
            +
                        audio = gr.Audio(interactive=False, label='Output Audio', autoplay=True)
         
     | 
| 135 | 
         
            +
                        with gr.Accordion('Enable Autoplay', open=False):
         
     | 
| 136 | 
         
            +
                            autoplay = gr.Checkbox(value=True, label='Autoplay')
         
     | 
| 137 | 
         
            +
                            autoplay.change(toggle_autoplay, inputs=[autoplay], outputs=[audio])
         
     | 
| 138 | 
         
            +
             
     | 
| 139 | 
         
            +
                text.submit(
         
     | 
| 140 | 
         
            +
                    text_to_speech, 
         
     | 
| 141 | 
         
            +
                    inputs=[text, model_name,voice, speed, trim, pad_between, remove_silence, minimum_silence], 
         
     | 
| 142 | 
         
            +
                    outputs=[audio]
         
     | 
| 143 | 
         
            +
                )
         
     | 
| 144 | 
         
            +
                generate_btn.click(
         
     | 
| 145 | 
         
            +
                    text_to_speech, 
         
     | 
| 146 | 
         
            +
                    inputs=[text,model_name, voice, speed, trim, pad_between, remove_silence, minimum_silence], 
         
     | 
| 147 | 
         
            +
                    outputs=[audio]
         
     | 
| 148 | 
         
            +
                )
         
     | 
| 149 | 
         
            +
             
     | 
| 150 | 
         
            +
            def podcast_maker(text,remove_silence=False,minimum_silence=50,model_name="kokoro-v0_19.pth"):
         
     | 
| 151 | 
         
            +
                global MODEL,device
         
     | 
| 152 | 
         
            +
                update_model(model_name)
         
     | 
| 153 | 
         
            +
                if not minimum_silence:
         
     | 
| 154 | 
         
            +
                    minimum_silence = 0.05
         
     | 
| 155 | 
         
            +
                keep_silence = int(minimum_silence * 1000)
         
     | 
| 156 | 
         
            +
                podcast_save_at=podcast(MODEL, device,text,remove_silence=remove_silence, minimum_silence=keep_silence)
         
     | 
| 157 | 
         
            +
                return podcast_save_at
         
     | 
| 158 | 
         
            +
                
         
     | 
| 159 | 
         
            +
             
     | 
| 160 | 
         
            +
             
     | 
| 161 | 
         
            +
            dummpy_example="""{af} Hello, I'd like to order a sandwich please.                                                         
         
     | 
| 162 | 
         
            +
            {af_sky} What do you mean you're out of bread?                                                                      
         
     | 
| 163 | 
         
            +
            {af_bella} I really wanted a sandwich though...                                                              
         
     | 
| 164 | 
         
            +
            {af_nicole} You know what, darn you and your little shop!                                                                       
         
     | 
| 165 | 
         
            +
            {bm_george} I'll just go back home and cry now.                                                                           
         
     | 
| 166 | 
         
            +
            {am_adam} Why me?"""
         
     | 
| 167 | 
         
            +
            with gr.Blocks() as demo2:
         
     | 
| 168 | 
         
            +
                gr.Markdown(
         
     | 
| 169 | 
         
            +
                    """
         
     | 
| 170 | 
         
            +
                # Multiple Speech-Type Generation
         
     | 
| 171 | 
         
            +
                This section allows you to generate multiple speech types or multiple people's voices. Enter your text in the format shown below, and the system will generate speech using the appropriate type. If unspecified, the model will use "af" voice.
         
     | 
| 172 | 
         
            +
                Format:
         
     | 
| 173 | 
         
            +
                {voice_name} your text here
         
     | 
| 174 | 
         
            +
                """
         
     | 
| 175 | 
         
            +
                )
         
     | 
| 176 | 
         
            +
                with gr.Row():
         
     | 
| 177 | 
         
            +
                    gr.Markdown(
         
     | 
| 178 | 
         
            +
                        """
         
     | 
| 179 | 
         
            +
                        **Example Input:**                                                                      
         
     | 
| 180 | 
         
            +
                        {af} Hello, I'd like to order a sandwich please.                                                         
         
     | 
| 181 | 
         
            +
                        {af_sky} What do you mean you're out of bread?                                                                      
         
     | 
| 182 | 
         
            +
                        {af_bella} I really wanted a sandwich though...                                                              
         
     | 
| 183 | 
         
            +
                        {af_nicole} You know what, darn you and your little shop!                                                                       
         
     | 
| 184 | 
         
            +
                        {bm_george} I'll just go back home and cry now.                                                                           
         
     | 
| 185 | 
         
            +
                        {am_adam} Why me?!                                                                         
         
     | 
| 186 | 
         
            +
                        """
         
     | 
| 187 | 
         
            +
                    )
         
     | 
| 188 | 
         
            +
                with gr.Row():
         
     | 
| 189 | 
         
            +
                    with gr.Column():
         
     | 
| 190 | 
         
            +
                        text = gr.Textbox(
         
     | 
| 191 | 
         
            +
                            label='Enter Text',
         
     | 
| 192 | 
         
            +
                            lines=7,
         
     | 
| 193 | 
         
            +
                            placeholder=dummpy_example
         
     | 
| 194 | 
         
            +
                        )
         
     | 
| 195 | 
         
            +
                        with gr.Row():
         
     | 
| 196 | 
         
            +
                            generate_btn = gr.Button('Generate', variant='primary')
         
     | 
| 197 | 
         
            +
                        with gr.Accordion('Audio Settings', open=False):
         
     | 
| 198 | 
         
            +
                            remove_silence = gr.Checkbox(value=False, label='✂️ Remove Silence From TTS')
         
     | 
| 199 | 
         
            +
                            minimum_silence = gr.Number(
         
     | 
| 200 | 
         
            +
                                label="Keep Silence Upto (In seconds)", 
         
     | 
| 201 | 
         
            +
                                value=0.20
         
     | 
| 202 | 
         
            +
                            )
         
     | 
| 203 | 
         
            +
                    with gr.Column():
         
     | 
| 204 | 
         
            +
                        audio = gr.Audio(interactive=False, label='Output Audio', autoplay=True)
         
     | 
| 205 | 
         
            +
                        with gr.Accordion('Enable Autoplay', open=False):
         
     | 
| 206 | 
         
            +
                            autoplay = gr.Checkbox(value=True, label='Autoplay')
         
     | 
| 207 | 
         
            +
                            autoplay.change(toggle_autoplay, inputs=[autoplay], outputs=[audio])
         
     | 
| 208 | 
         
            +
             
     | 
| 209 | 
         
            +
                text.submit(
         
     | 
| 210 | 
         
            +
                    podcast_maker, 
         
     | 
| 211 | 
         
            +
                    inputs=[text, remove_silence, minimum_silence], 
         
     | 
| 212 | 
         
            +
                    outputs=[audio]
         
     | 
| 213 | 
         
            +
                )
         
     | 
| 214 | 
         
            +
                generate_btn.click(
         
     | 
| 215 | 
         
            +
                    podcast_maker, 
         
     | 
| 216 | 
         
            +
                    inputs=[text, remove_silence, minimum_silence], 
         
     | 
| 217 | 
         
            +
                    outputs=[audio]
         
     | 
| 218 | 
         
            +
                )
         
     | 
| 219 | 
         
            +
             
     | 
| 220 | 
         
            +
             
     | 
| 221 | 
         
            +
             
     | 
| 222 | 
         
            +
             
     | 
| 223 | 
         
            +
            import shutil
         
     | 
| 224 | 
         
            +
            import os
         
     | 
| 225 | 
         
            +
             
     | 
| 226 | 
         
            +
            # Ensure the output directory exists
         
     | 
| 227 | 
         
            +
            output_dir = "./temp_audio"
         
     | 
| 228 | 
         
            +
            os.makedirs(output_dir, exist_ok=True)
         
     | 
| 229 | 
         
            +
             
     | 
| 230 | 
         
            +
             
     | 
| 231 | 
         
            +
             
     | 
| 232 | 
         
            +
             
     | 
| 233 | 
         
            +
             
     | 
| 234 | 
         
            +
             
     | 
| 235 | 
         
            +
             
     | 
| 236 | 
         
            +
             
     | 
| 237 | 
         
            +
             
     | 
| 238 | 
         
            +
            #@title Generate Audio File From Subtitle
         
     | 
| 239 | 
         
            +
            # from tqdm.notebook import tqdm
         
     | 
| 240 | 
         
            +
            from tqdm import tqdm
         
     | 
| 241 | 
         
            +
            import subprocess
         
     | 
| 242 | 
         
            +
            import json
         
     | 
| 243 | 
         
            +
            import pysrt
         
     | 
| 244 | 
         
            +
            import os
         
     | 
| 245 | 
         
            +
            from pydub import AudioSegment
         
     | 
| 246 | 
         
            +
            import shutil
         
     | 
| 247 | 
         
            +
            import uuid
         
     | 
| 248 | 
         
            +
            import re
         
     | 
| 249 | 
         
            +
            import time
         
     | 
| 250 | 
         
            +
             
     | 
| 251 | 
         
            +
            # os.chdir(install_path)
         
     | 
| 252 | 
         
            +
             
     | 
| 253 | 
         
            +
            def your_tts(text,audio_path,actual_duration,speed=1.0):
         
     | 
| 254 | 
         
            +
              global srt_voice_name
         
     | 
| 255 | 
         
            +
              model_name="kokoro-v0_19.pth"
         
     | 
| 256 | 
         
            +
              tts_path=text_to_speech(text, model_name, voice_name=srt_voice_name,speed=speed)
         
     | 
| 257 | 
         
            +
              print(tts_path)
         
     | 
| 258 | 
         
            +
              tts_audio = AudioSegment.from_file(tts_path)
         
     | 
| 259 | 
         
            +
              tts_duration = len(tts_audio)
         
     | 
| 260 | 
         
            +
              if tts_duration > actual_duration:
         
     | 
| 261 | 
         
            +
                speedup_factor = tts_duration / actual_duration
         
     | 
| 262 | 
         
            +
                tts_path=text_to_speech(text, model_name, voice_name=srt_voice_name,speed=speedup_factor)
         
     | 
| 263 | 
         
            +
              print(tts_path)
         
     | 
| 264 | 
         
            +
              shutil.copy(tts_path,audio_path)
         
     | 
| 265 | 
         
            +
             
     | 
| 266 | 
         
            +
             
     | 
| 267 | 
         
            +
             
     | 
| 268 | 
         
            +
            base_path="."
         
     | 
| 269 | 
         
            +
            import datetime
         
     | 
| 270 | 
         
            +
            def get_current_time():
         
     | 
| 271 | 
         
            +
                # Return current time as a string in the format HH_MM_AM/PM
         
     | 
| 272 | 
         
            +
                return datetime.datetime.now().strftime("%I_%M_%p")
         
     | 
| 273 | 
         
            +
             
     | 
| 274 | 
         
            +
            def get_subtitle_Dub_path(srt_file_path,Language="en"):
         
     | 
| 275 | 
         
            +
              file_name = os.path.splitext(os.path.basename(srt_file_path))[0]
         
     | 
| 276 | 
         
            +
              if not os.path.exists(f"{base_path}/TTS_DUB"):
         
     | 
| 277 | 
         
            +
                os.mkdir(f"{base_path}/TTS_DUB")
         
     | 
| 278 | 
         
            +
              random_string = str(uuid.uuid4())[:6]
         
     | 
| 279 | 
         
            +
              new_path=f"{base_path}/TTS_DUB/{file_name}_{Language}_{get_current_time()}_{random_string}.wav"
         
     | 
| 280 | 
         
            +
              return new_path
         
     | 
| 281 | 
         
            +
             
     | 
| 282 | 
         
            +
             
     | 
| 283 | 
         
            +
             
     | 
| 284 | 
         
            +
             
     | 
| 285 | 
         
            +
             
     | 
| 286 | 
         
            +
             
     | 
| 287 | 
         
            +
             
     | 
| 288 | 
         
            +
             
     | 
| 289 | 
         
            +
            def clean_srt(input_path):
         
     | 
| 290 | 
         
            +
                file_name = os.path.basename(input_path)
         
     | 
| 291 | 
         
            +
                output_folder = f"{base_path}/save_srt"
         
     | 
| 292 | 
         
            +
                if not os.path.exists(output_folder):
         
     | 
| 293 | 
         
            +
                    os.mkdir(output_folder)
         
     | 
| 294 | 
         
            +
                output_path = f"{output_folder}/{file_name}"
         
     | 
| 295 | 
         
            +
             
     | 
| 296 | 
         
            +
                def clean_srt_line(text):
         
     | 
| 297 | 
         
            +
                    bad_list = ["[", "]", "♫", "\n"]
         
     | 
| 298 | 
         
            +
                    for i in bad_list:
         
     | 
| 299 | 
         
            +
                        text = text.replace(i, "")
         
     | 
| 300 | 
         
            +
                    return text.strip()
         
     | 
| 301 | 
         
            +
             
     | 
| 302 | 
         
            +
                # Load the subtitle file
         
     | 
| 303 | 
         
            +
                subs = pysrt.open(input_path)
         
     | 
| 304 | 
         
            +
             
     | 
| 305 | 
         
            +
                # Iterate through each subtitle and print its details
         
     | 
| 306 | 
         
            +
                with open(output_path, "w", encoding='utf-8') as file:
         
     | 
| 307 | 
         
            +
                    for sub in subs:
         
     | 
| 308 | 
         
            +
                        file.write(f"{sub.index}\n")
         
     | 
| 309 | 
         
            +
                        file.write(f"{sub.start} --> {sub.end}\n")
         
     | 
| 310 | 
         
            +
                        file.write(f"{clean_srt_line(sub.text)}\n")
         
     | 
| 311 | 
         
            +
                        file.write("\n")
         
     | 
| 312 | 
         
            +
                    file.close()
         
     | 
| 313 | 
         
            +
                # print(f"Clean SRT saved at: {output_path}")
         
     | 
| 314 | 
         
            +
                return output_path
         
     | 
| 315 | 
         
            +
            # Example usage
         
     | 
| 316 | 
         
            +
             
     | 
| 317 | 
         
            +
             
     | 
| 318 | 
         
            +
             
     | 
| 319 | 
         
            +
             
     | 
| 320 | 
         
            +
             
     | 
| 321 | 
         
            +
             
     | 
| 322 | 
         
            +
            class SRTDubbing:
         
     | 
| 323 | 
         
            +
                def __init__(self):
         
     | 
| 324 | 
         
            +
                    pass
         
     | 
| 325 | 
         
            +
             
     | 
| 326 | 
         
            +
                @staticmethod
         
     | 
| 327 | 
         
            +
                def text_to_speech_srt(text, audio_path, language, actual_duration):
         
     | 
| 328 | 
         
            +
                    tts_filename = "./cache/temp.wav"
         
     | 
| 329 | 
         
            +
                    your_tts(text,tts_filename,actual_duration,speed=1.0)
         
     | 
| 330 | 
         
            +
                    # Check the duration of the generated TTS audio
         
     | 
| 331 | 
         
            +
                    tts_audio = AudioSegment.from_file(tts_filename)
         
     | 
| 332 | 
         
            +
                    tts_duration = len(tts_audio)
         
     | 
| 333 | 
         
            +
             
     | 
| 334 | 
         
            +
                    if actual_duration == 0:
         
     | 
| 335 | 
         
            +
                        # If actual duration is zero, use the original TTS audio without modifications
         
     | 
| 336 | 
         
            +
                        shutil.move(tts_filename, audio_path)
         
     | 
| 337 | 
         
            +
                        return
         
     | 
| 338 | 
         
            +
                    # If TTS audio duration is longer than actual duration, speed up the audio
         
     | 
| 339 | 
         
            +
                    if tts_duration > actual_duration:
         
     | 
| 340 | 
         
            +
                        speedup_factor = tts_duration / actual_duration
         
     | 
| 341 | 
         
            +
                        speedup_filename = "./cache/speedup_temp.wav"
         
     | 
| 342 | 
         
            +
                        # Use ffmpeg to change audio speed
         
     | 
| 343 | 
         
            +
                        subprocess.run([
         
     | 
| 344 | 
         
            +
                            "ffmpeg",
         
     | 
| 345 | 
         
            +
                            "-i", tts_filename,
         
     | 
| 346 | 
         
            +
                            "-filter:a", f"atempo={speedup_factor}",
         
     | 
| 347 | 
         
            +
                            speedup_filename,
         
     | 
| 348 | 
         
            +
                            "-y"
         
     | 
| 349 | 
         
            +
                        ], check=True)
         
     | 
| 350 | 
         
            +
             
     | 
| 351 | 
         
            +
                        # Replace the original TTS audio with the sped-up version
         
     | 
| 352 | 
         
            +
                        shutil.move(speedup_filename, audio_path)
         
     | 
| 353 | 
         
            +
                    elif tts_duration < actual_duration:
         
     | 
| 354 | 
         
            +
                        # If TTS audio duration is less than actual duration, add silence to match the duration
         
     | 
| 355 | 
         
            +
                        silence_gap = actual_duration - tts_duration
         
     | 
| 356 | 
         
            +
                        silence = AudioSegment.silent(duration=int(silence_gap))
         
     | 
| 357 | 
         
            +
                        new_audio = tts_audio + silence
         
     | 
| 358 | 
         
            +
             
     | 
| 359 | 
         
            +
                        # Save the new audio with added silence
         
     | 
| 360 | 
         
            +
                        new_audio.export(audio_path, format="wav")
         
     | 
| 361 | 
         
            +
                    else:
         
     | 
| 362 | 
         
            +
                        # If TTS audio duration is equal to actual duration, use the original TTS audio
         
     | 
| 363 | 
         
            +
                        shutil.move(tts_filename, audio_path)
         
     | 
| 364 | 
         
            +
             
     | 
| 365 | 
         
            +
                @staticmethod
         
     | 
| 366 | 
         
            +
                def make_silence(pause_time, pause_save_path):
         
     | 
| 367 | 
         
            +
                    silence = AudioSegment.silent(duration=pause_time)
         
     | 
| 368 | 
         
            +
                    silence.export(pause_save_path, format="wav")
         
     | 
| 369 | 
         
            +
                    return pause_save_path
         
     | 
| 370 | 
         
            +
             
     | 
| 371 | 
         
            +
                @staticmethod
         
     | 
| 372 | 
         
            +
                def create_folder_for_srt(srt_file_path):
         
     | 
| 373 | 
         
            +
                    srt_base_name = os.path.splitext(os.path.basename(srt_file_path))[0]
         
     | 
| 374 | 
         
            +
                    random_uuid = str(uuid.uuid4())[:4]
         
     | 
| 375 | 
         
            +
                    dummy_folder_path = f"{base_path}/dummy"
         
     | 
| 376 | 
         
            +
                    if not os.path.exists(dummy_folder_path):
         
     | 
| 377 | 
         
            +
                        os.makedirs(dummy_folder_path)
         
     | 
| 378 | 
         
            +
                    folder_path = os.path.join(dummy_folder_path, f"{srt_base_name}_{random_uuid}")
         
     | 
| 379 | 
         
            +
                    os.makedirs(folder_path, exist_ok=True)
         
     | 
| 380 | 
         
            +
                    return folder_path
         
     | 
| 381 | 
         
            +
             
     | 
| 382 | 
         
            +
                @staticmethod
         
     | 
| 383 | 
         
            +
                def concatenate_audio_files(audio_paths, output_path):
         
     | 
| 384 | 
         
            +
                    concatenated_audio = AudioSegment.silent(duration=0)
         
     | 
| 385 | 
         
            +
                    for audio_path in audio_paths:
         
     | 
| 386 | 
         
            +
                        audio_segment = AudioSegment.from_file(audio_path)
         
     | 
| 387 | 
         
            +
                        concatenated_audio += audio_segment
         
     | 
| 388 | 
         
            +
                    concatenated_audio.export(output_path, format="wav")
         
     | 
| 389 | 
         
            +
             
     | 
| 390 | 
         
            +
                def srt_to_dub(self, srt_file_path,dub_save_path,language='en'):
         
     | 
| 391 | 
         
            +
                    result = self.read_srt_file(srt_file_path)
         
     | 
| 392 | 
         
            +
                    new_folder_path = self.create_folder_for_srt(srt_file_path)
         
     | 
| 393 | 
         
            +
                    join_path = []
         
     | 
| 394 | 
         
            +
                    for i in tqdm(result):
         
     | 
| 395 | 
         
            +
                    # for i in result:
         
     | 
| 396 | 
         
            +
                        text = i['text']
         
     | 
| 397 | 
         
            +
                        actual_duration = i['end_time'] - i['start_time']
         
     | 
| 398 | 
         
            +
                        pause_time = i['pause_time']
         
     | 
| 399 | 
         
            +
                        slient_path = f"{new_folder_path}/{i['previous_pause']}"
         
     | 
| 400 | 
         
            +
                        self.make_silence(pause_time, slient_path)
         
     | 
| 401 | 
         
            +
                        join_path.append(slient_path)
         
     | 
| 402 | 
         
            +
                        tts_path = f"{new_folder_path}/{i['audio_name']}"
         
     | 
| 403 | 
         
            +
                        self.text_to_speech_srt(text, tts_path, language, actual_duration)
         
     | 
| 404 | 
         
            +
                        join_path.append(tts_path)
         
     | 
| 405 | 
         
            +
                    self.concatenate_audio_files(join_path, dub_save_path)
         
     | 
| 406 | 
         
            +
             
     | 
| 407 | 
         
            +
                @staticmethod
         
     | 
| 408 | 
         
            +
                def convert_to_millisecond(time_str):
         
     | 
| 409 | 
         
            +
                  if isinstance(time_str, str):
         
     | 
| 410 | 
         
            +
                      hours, minutes, second_millisecond = time_str.split(':')
         
     | 
| 411 | 
         
            +
                      seconds, milliseconds = second_millisecond.split(",")
         
     | 
| 412 | 
         
            +
             
     | 
| 413 | 
         
            +
                      total_milliseconds = (
         
     | 
| 414 | 
         
            +
                          int(hours) * 3600000 +
         
     | 
| 415 | 
         
            +
                          int(minutes) * 60000 +
         
     | 
| 416 | 
         
            +
                          int(seconds) * 1000 +
         
     | 
| 417 | 
         
            +
                          int(milliseconds)
         
     | 
| 418 | 
         
            +
                      )
         
     | 
| 419 | 
         
            +
             
     | 
| 420 | 
         
            +
                      return total_milliseconds
         
     | 
| 421 | 
         
            +
                @staticmethod
         
     | 
| 422 | 
         
            +
                def read_srt_file(file_path):
         
     | 
| 423 | 
         
            +
                    entries = []
         
     | 
| 424 | 
         
            +
                    default_start = 0
         
     | 
| 425 | 
         
            +
                    previous_end_time = default_start
         
     | 
| 426 | 
         
            +
                    entry_number = 1
         
     | 
| 427 | 
         
            +
                    audio_name_template = "{}.wav"
         
     | 
| 428 | 
         
            +
                    previous_pause_template = "{}_before_pause.wav"
         
     | 
| 429 | 
         
            +
             
     | 
| 430 | 
         
            +
                    with open(file_path, 'r', encoding='utf-8') as file:
         
     | 
| 431 | 
         
            +
                        lines = file.readlines()
         
     | 
| 432 | 
         
            +
                        # print(lines)
         
     | 
| 433 | 
         
            +
                        for i in range(0, len(lines), 4):
         
     | 
| 434 | 
         
            +
                            time_info = re.findall(r'(\d+:\d+:\d+,\d+) --> (\d+:\d+:\d+,\d+)', lines[i + 1])
         
     | 
| 435 | 
         
            +
                            start_time = SRTDubbing.convert_to_millisecond(time_info[0][0])
         
     | 
| 436 | 
         
            +
                            end_time = SRTDubbing.convert_to_millisecond(time_info[0][1])
         
     | 
| 437 | 
         
            +
             
     | 
| 438 | 
         
            +
                            current_entry = {
         
     | 
| 439 | 
         
            +
                                'entry_number': entry_number,
         
     | 
| 440 | 
         
            +
                                'start_time': start_time,
         
     | 
| 441 | 
         
            +
                                'end_time': end_time,
         
     | 
| 442 | 
         
            +
                                'text': lines[i + 2].strip(),
         
     | 
| 443 | 
         
            +
                                'pause_time': start_time - previous_end_time if entry_number != 1 else start_time - default_start,
         
     | 
| 444 | 
         
            +
                                'audio_name': audio_name_template.format(entry_number),
         
     | 
| 445 | 
         
            +
                                'previous_pause': previous_pause_template.format(entry_number),
         
     | 
| 446 | 
         
            +
                            }
         
     | 
| 447 | 
         
            +
             
     | 
| 448 | 
         
            +
                            entries.append(current_entry)
         
     | 
| 449 | 
         
            +
                            previous_end_time = end_time
         
     | 
| 450 | 
         
            +
                            entry_number += 1
         
     | 
| 451 | 
         
            +
             
     | 
| 452 | 
         
            +
                    with open("entries.json", "w") as file:
         
     | 
| 453 | 
         
            +
                        json.dump(entries, file, indent=4)
         
     | 
| 454 | 
         
            +
                    return entries
         
     | 
| 455 | 
         
            +
            srt_voice_name="am_adam"   
         
     | 
| 456 | 
         
            +
            def srt_process(srt_file_path,voice_name,dest_language="en"):
         
     | 
| 457 | 
         
            +
              global srt_voice_name
         
     | 
| 458 | 
         
            +
              srt_voice_name=voice_name
         
     | 
| 459 | 
         
            +
              srt_dubbing = SRTDubbing()
         
     | 
| 460 | 
         
            +
              dub_save_path=get_subtitle_Dub_path(srt_file_path,dest_language)
         
     | 
| 461 | 
         
            +
              srt_dubbing.srt_to_dub(srt_file_path,dub_save_path,dest_language)
         
     | 
| 462 | 
         
            +
              return dub_save_path
         
     | 
| 463 | 
         
            +
             
     | 
| 464 | 
         
            +
            # 
         
     | 
| 465 | 
         
            +
            # srt_file_path="./long.srt"
         
     | 
| 466 | 
         
            +
            # dub_audio_path=srt_process(srt_file_path)
         
     | 
| 467 | 
         
            +
            # print(f"Audio file saved at: {dub_audio_path}")
         
     | 
| 468 | 
         
            +
             
     | 
| 469 | 
         
            +
             
     | 
| 470 | 
         
            +
             
     | 
| 471 | 
         
            +
            with gr.Blocks() as demo3:
         
     | 
| 472 | 
         
            +
             
     | 
| 473 | 
         
            +
                gr.Markdown(
         
     | 
| 474 | 
         
            +
                    """
         
     | 
| 475 | 
         
            +
                    # Generate Audio File From Subtitle [Single Speaker Only]
         
     | 
| 476 | 
         
            +
                    
         
     | 
| 477 | 
         
            +
                    To generate subtitles, you can use the [Whisper Turbo Subtitle](https://github.com/NeuralFalconYT/Whisper-Turbo-Subtitle) 
         
     | 
| 478 | 
         
            +
                    
         
     | 
| 479 | 
         
            +
                    [](https://colab.research.google.com/github/NeuralFalconYT/Whisper-Turbo-Subtitle/blob/main/Whisper_Turbo_Subtitle.ipynb)
         
     | 
| 480 | 
         
            +
                    """
         
     | 
| 481 | 
         
            +
                )
         
     | 
| 482 | 
         
            +
                with gr.Row():
         
     | 
| 483 | 
         
            +
                    with gr.Column():
         
     | 
| 484 | 
         
            +
                        srt_file = gr.File(label='Upload .srt Subtitle File Only')
         
     | 
| 485 | 
         
            +
                        with gr.Row():
         
     | 
| 486 | 
         
            +
                            voice = gr.Dropdown(
         
     | 
| 487 | 
         
            +
                                voice_list, 
         
     | 
| 488 | 
         
            +
                                value='af', 
         
     | 
| 489 | 
         
            +
                                allow_custom_value=False, 
         
     | 
| 490 | 
         
            +
                                label='Voice', 
         
     | 
| 491 | 
         
            +
                            )
         
     | 
| 492 | 
         
            +
                        with gr.Row():
         
     | 
| 493 | 
         
            +
                            generate_btn_ = gr.Button('Generate', variant='primary')
         
     | 
| 494 | 
         
            +
                      
         
     | 
| 495 | 
         
            +
                    with gr.Column():
         
     | 
| 496 | 
         
            +
                        audio = gr.Audio(interactive=False, label='Output Audio', autoplay=True)
         
     | 
| 497 | 
         
            +
                        with gr.Accordion('Enable Autoplay', open=False):
         
     | 
| 498 | 
         
            +
                            autoplay = gr.Checkbox(value=True, label='Autoplay')
         
     | 
| 499 | 
         
            +
                            autoplay.change(toggle_autoplay, inputs=[autoplay], outputs=[audio])
         
     | 
| 500 | 
         
            +
             
     | 
| 501 | 
         
            +
                # srt_file.submit(
         
     | 
| 502 | 
         
            +
                #     srt_process, 
         
     | 
| 503 | 
         
            +
                #     inputs=[srt_file, voice], 
         
     | 
| 504 | 
         
            +
                #     outputs=[audio]
         
     | 
| 505 | 
         
            +
                # )
         
     | 
| 506 | 
         
            +
                generate_btn_.click(
         
     | 
| 507 | 
         
            +
                    srt_process, 
         
     | 
| 508 | 
         
            +
                    inputs=[srt_file,voice], 
         
     | 
| 509 | 
         
            +
                    outputs=[audio]
         
     | 
| 510 | 
         
            +
                )
         
     | 
| 511 | 
         
            +
                
         
     | 
| 512 | 
         
            +
                
         
     | 
| 513 | 
         
            +
            display_text = "  \n".join(voice_list)
         
     | 
| 514 | 
         
            +
             
     | 
| 515 | 
         
            +
            with gr.Blocks() as demo4:
         
     | 
| 516 | 
         
            +
                gr.Markdown(f"# Voice Names \n{display_text}")
         
     | 
| 517 | 
         
            +
             
     | 
| 518 | 
         
            +
             
     | 
| 519 | 
         
            +
            import click
         
     | 
| 520 | 
         
            +
            @click.command()
         
     | 
| 521 | 
         
            +
            @click.option("--debug", is_flag=True, default=False, help="Enable debug mode.")
         
     | 
| 522 | 
         
            +
            @click.option("--share", is_flag=True, default=False, help="Enable sharing of the interface.")
         
     | 
| 523 | 
         
            +
            def main(debug, share):
         
     | 
| 524 | 
         
            +
                demo = gr.TabbedInterface([demo1, demo2,demo3,demo4], ["Batched TTS", "Multiple Speech-Type Generation","SRT Dubbing","Available Voice Names"],title="Kokoro TTS")
         
     | 
| 525 | 
         
            +
             
     | 
| 526 | 
         
            +
                demo.queue().launch(debug=debug, share=share)
         
     | 
| 527 | 
         
            +
                #Run on local network
         
     | 
| 528 | 
         
            +
                # laptop_ip="192.168.0.30"
         
     | 
| 529 | 
         
            +
                # port=8080
         
     | 
| 530 | 
         
            +
                # demo.queue().launch(debug=debug, share=share,server_name=laptop_ip,server_port=port)
         
     | 
| 531 | 
         
            +
             
     | 
| 532 | 
         
            +
            if __name__ == "__main__":
         
     | 
| 533 | 
         
            +
                main()    
         
     | 
| 534 | 
         
            +
             
     | 
| 535 | 
         
            +
             
     | 
| 536 | 
         
            +
            ##For client side
         
     | 
| 537 | 
         
            +
            # from gradio_client import Client
         
     | 
| 538 | 
         
            +
            # import shutil
         
     | 
| 539 | 
         
            +
            # import os
         
     | 
| 540 | 
         
            +
            # os.makedirs("temp_audio", exist_ok=True)
         
     | 
| 541 | 
         
            +
            # from gradio_client import Client
         
     | 
| 542 | 
         
            +
            # client = Client("http://127.0.0.1:7860/")
         
     | 
| 543 | 
         
            +
            # result = client.predict(
         
     | 
| 544 | 
         
            +
            # 		text="Hello!!",
         
     | 
| 545 | 
         
            +
            # 		model_name="kokoro-v0_19.pth",
         
     | 
| 546 | 
         
            +
            # 		voice_name="af_bella",
         
     | 
| 547 | 
         
            +
            # 		speed=1,
         
     | 
| 548 | 
         
            +
            # 		trim=0,
         
     | 
| 549 | 
         
            +
            # 		pad_between_segments=0,
         
     | 
| 550 | 
         
            +
            # 		remove_silence=False,
         
     | 
| 551 | 
         
            +
            # 		minimum_silence=0.05,
         
     | 
| 552 | 
         
            +
            # 		api_name="/text_to_speech"
         
     | 
| 553 | 
         
            +
            # )
         
     | 
| 554 | 
         
            +
             
     | 
| 555 | 
         
            +
            # save_at=f"./temp_audio/{os.path.basename(result)}"
         
     | 
| 556 | 
         
            +
            # shutil.move(result, save_at)
         
     | 
| 557 | 
         
            +
            # print(f"Saved at {save_at}")
         
     |