File size: 6,092 Bytes

import argparse
import tempfile
import random
import re
import string
import subprocess
from typing import Optional
from TTS.config import load_config
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer

def sanitize_filename(filename):
    """Remove or replace any characters that are not allowed in file names."""
    return ''.join(c for c in filename if c.isalnum() or c in (' ', '_', '-')).rstrip()

# function to add original punctuation to cotovía numbers extension (option p)
def punctuate_p(str_ext):
    """fix punctuation in cotovia output"""
    # substitute ' ·\n' by ...
    str_ext = re.sub(r" ·", r"...", str_ext)
    
    # remove spaces before , . ! ? ; : ) ] of the extended string
    str_ext = re.sub(r"\s+([.,!?;:)\]])", r"\1", str_ext)

    # remove spaces after ( [ ¡ ¿ of the extended string
    str_ext = re.sub(r"([\(\[¡¿])\s+", r"\1", str_ext)

    # remove unwanted spaces between quotations marks
    str_ext = re.sub(r'"\s*([^"]*?)\s*"', r'"\1"', str_ext)

    # substitute '- text -' to '-text-'
    str_ext = re.sub(r"-\s*([^-]*?)\s*-", r"-\1-", str_ext)

    # remove initial question marks
    str_ext = re.sub(r"[¿¡]", r"", str_ext)

    # eliminate extra spaces
    str_ext = re.sub(r"\s+", r" ", str_ext)

    str_ext = re.sub(r"(\d+)\s*-\s*(\d+)", r"\1 \2", str_ext)

    ### - , ' and () by commas
    # substitute '- text -' to ', text,'
    str_ext = re.sub(r"(\w+)\s+-([^-]*?)-\s+([^-]*?)", r"\1, \2, ", str_ext)

    # substitute ' - ' by ', '
    str_ext = re.sub(r"(\w+[!\?]?)\s+-\s*", r"\1, ", str_ext)

    # substitute ' ( text )' to ', text,'
    str_ext = re.sub(r"(\w+)\s*\(\s*([^\(\)]*?)\s*\)", r"\1, \2,", str_ext)


    return str_ext


def to_cotovia(text):
    """Preprocess text with Cotovía"""
    ## Initial text preprocessing
    # substitute ' M€' by 'millóns de euros' and 'somewordM€' by 'someword millóns de euros'
    text = re.sub(r"(\w+)\s*M€", r"\1 millóns de euros", text)

    # substitute ' €' by 'euros' and 'someword€' by 'someword euros'
    text = re.sub(r"(\w+)\s*€", r"\1 euros", text)
    
    # substitute ' ºC' by 'graos centígrados' and 'somewordºC' by 'someword graos centígrados'
    text = re.sub(r"(\w+)\s*ºC", r"\1 graos centígrados", text)

    # Random string generation
    res = ''.join(random.choices(string.ascii_lowercase + string.digits, k=5))

    text = subprocess.run(["sed", "-e", "s/₂//g", "-e", "s/⸺//g", "-e", "s/ //g", "-e", "s///g", "-e", "s/č/c/g", "-e", "s/ț/t/g", "-e", "s/ğ/g/g", "-e", "s/ș/s/g",
                "-e", "s/ş/s/g", "-e", "s/Ž/Z/g", "-e", "s/ž/z/g", "-e", "s/ț/t/g", "-e", "s/ğ/g/g", "-e", "s/ș/s/g", "-e", "s/ş/s/g", "-e", "s/«//g", "-e", "s/»//g",
                "-e", "s/<<//g", "-e", "s/>>//g", "-e", "s/“/\"/g", "-e", "s/”/'\"'/g", "-e", "s/\'//g", "-e", "s/‘//g", "-e", "s/’//g", "-e", "s/…//g",
                "-e", "s/-/-/g", "-e", "s/–/-/g", "-e", "s/—/-/g", "-e", "s/―/-/g", "-e", "s/−/-/g", "-e", "s/‒/-/g", "-e", "s/─/-/g"],
                input = text, text = True, capture_output=True).stdout

    # Input and output Cotovía files
    COTOVIA_IN_TXT_PATH = res + '.txt'
    COTOVIA_IN_TXT_PATH_ISO = 'iso8859-1' + res + '.txt'
    COTOVIA_OUT_PRE_PATH = 'iso8859-1' + res + '.pre'
    COTOVIA_OUT_PRE_PATH_UTF8 = 'utf8' + res + '.pre'

    with open(COTOVIA_IN_TXT_PATH, 'w') as f:
        f.write(text + '\n')


    # utf-8 to iso8859-1
    subprocess.run(["iconv", "-f", "utf-8", "-t", "iso8859-1", COTOVIA_IN_TXT_PATH, "-o", COTOVIA_IN_TXT_PATH_ISO], stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
    subprocess.run(["cotovia", "-i", COTOVIA_IN_TXT_PATH_ISO, "-p"], stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
    subprocess.run(["iconv", "-f", "iso8859-1", "-t", "utf-8", COTOVIA_OUT_PRE_PATH, "-o", COTOVIA_OUT_PRE_PATH_UTF8], stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)

    segs = []
    try:
        with open(COTOVIA_OUT_PRE_PATH_UTF8, 'r') as f:
            segs = [line.rstrip() for line in f]
            # segs = [remove_tra3_tags(line) for line in segs] # modificar con punctuate_p
            segs = [punctuate_p(line) for line in segs] # modificar con punctuate_p
    except:
        print("ERROR: Couldn't read cotovia output")

    subprocess.run(["rm", COTOVIA_IN_TXT_PATH, COTOVIA_IN_TXT_PATH_ISO, COTOVIA_OUT_PRE_PATH, COTOVIA_OUT_PRE_PATH_UTF8], stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)

    return segs

def text_preprocess(text):
    """make call to cotovia and return the preprocessed text"""

    cotovia_preproc_text = to_cotovia(text)

    # convert list to string
    cotovia_preproc_text_res = ' '.join(cotovia_preproc_text)
    

    # add final punctuation if missing
    if cotovia_preproc_text_res[-1] not in string.punctuation:
        cotovia_preproc_text_res += '.'

    return cotovia_preproc_text_res


def main():
    """Main function to synthesize text"""
    parser = argparse.ArgumentParser(description='Cotovía text normalisation')
    parser.add_argument('text', type=str, help='Text to synthetize')
    parser.add_argument('model_path', type=str, help='Absolute path to the model checkpoint.pth')
    parser.add_argument('config_path', type=str, help='Absolute path to the model config.json')

    args = parser.parse_args()

    print("Text before preprocessing: ", args.text)
    text = text_preprocess(args.text)
    print("Text after preprocessing: ", text)

    synthesizer = Synthesizer(
        args.model_path, args.config_path, None, None, None, None,
    )

    # Step 1: Extract the first word from the text
    first_word = args.text.split()[0] if args.text.split() else "audio"
    first_word = sanitize_filename(first_word)  # Sanitize to make it a valid filename

    # Step 2: Use synthesizer's built-in function to synthesize and save the audio
    wavs = synthesizer.tts(text)
    filename = f"{first_word}.wav"
    synthesizer.save_wav(wavs, filename)

    print(f"Audio file saved as: {filename}")

if __name__ == "__main__":
    main()