cmagui's picture
Rename preprocess.py to synthesize.py
f23b7ed verified
raw
history blame
5.9 kB
import argparse
import tempfile
import random
import re
import string
import subprocess
from typing import Optional
from TTS.config import load_config
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer
def sanitize_filename(filename):
"""Remove or replace any characters that are not allowed in file names."""
return ''.join(c for c in filename if c.isalnum() or c in (' ', '_', '-')).rstrip()
# function to add original punctuation to cotovía numbers extension (option p)
def punctuate_p(str_ext):
# substitute ' ·\n' by ...
str_ext = re.sub(r" ·", r"...", str_ext)
# remove spaces before , . ! ? ; : ) ] of the extended string
str_ext = re.sub(r"\s+([.,!?;:)\]])", r"\1", str_ext)
# remove spaces after ( [ ¡ ¿ of the extended string
str_ext = re.sub(r"([\(\[¡¿])\s+", r"\1", str_ext)
# remove unwanted spaces between quotations marks
str_ext = re.sub(r'"\s*([^"]*?)\s*"', r'"\1"', str_ext)
# substitute '- text -' to '-text-'
str_ext = re.sub(r"-\s*([^-]*?)\s*-", r"-\1-", str_ext)
# remove initial question marks
str_ext = re.sub(r"[¿¡]", r"", str_ext)
# eliminate extra spaces
str_ext = re.sub(r"\s+", r" ", str_ext)
str_ext = re.sub(r"(\d+)\s*-\s*(\d+)", r"\1 \2", str_ext)
### - , ' and () by commas
# substitute '- text -' to ', text,'
str_ext = re.sub(r"(\w+)\s+-([^-]*?)-\s+([^-]*?)", r"\1, \2, ", str_ext)
# substitute ' - ' by ', '
str_ext = re.sub(r"(\w+[!\?]?)\s+-\s*", r"\1, ", str_ext)
# substitute ' ( text )' to ', text,'
str_ext = re.sub(r"(\w+)\s*\(\s*([^\(\)]*?)\s*\)", r"\1, \2,", str_ext)
return str_ext
def to_cotovia(text):
## Initial text preprocessing
# substitute ' M€' by 'millóns de euros' and 'somewordM€' by 'someword millóns de euros'
text = re.sub(r"(\w+)\s*M€", r"\1 millóns de euros", text)
# substitute ' €' by 'euros' and 'someword€' by 'someword euros'
text = re.sub(r"(\w+)\s*€", r"\1 euros", text)
# substitute ' ºC' by 'graos centígrados' and 'somewordºC' by 'someword graos centígrados'
text = re.sub(r"(\w+)\s*ºC", r"\1 graos centígrados", text)
# Random string generation
res = ''.join(random.choices(string.ascii_lowercase + string.digits, k=5))
text = subprocess.run(["sed", "-e", "s/₂//g", "-e", "s/⸺//g", "-e", "s/ //g", "-e", "s///g", "-e", "s/č/c/g", "-e", "s/ț/t/g", "-e", "s/ğ/g/g", "-e", "s/ș/s/g",
"-e", "s/ş/s/g", "-e", "s/Ž/Z/g", "-e", "s/ž/z/g", "-e", "s/ț/t/g", "-e", "s/ğ/g/g", "-e", "s/ș/s/g", "-e", "s/ş/s/g", "-e", "s/«//g", "-e", "s/»//g",
"-e", "s/<<//g", "-e", "s/>>//g", "-e", "s/“/\"/g", "-e", "s/”/'\"'/g", "-e", "s/\'//g", "-e", "s/‘//g", "-e", "s/’//g", "-e", "s/…//g",
"-e", "s/-/-/g", "-e", "s/–/-/g", "-e", "s/—/-/g", "-e", "s/―/-/g", "-e", "s/−/-/g", "-e", "s/‒/-/g", "-e", "s/─/-/g"],
input = text, text = True, capture_output=True).stdout
# Input and output Cotovía files
COTOVIA_IN_TXT_PATH = res + '.txt'
COTOVIA_IN_TXT_PATH_ISO = 'iso8859-1' + res + '.txt'
COTOVIA_OUT_PRE_PATH = 'iso8859-1' + res + '.pre'
COTOVIA_OUT_PRE_PATH_UTF8 = 'utf8' + res + '.pre'
with open(COTOVIA_IN_TXT_PATH, 'w') as f:
f.write(text + '\n')
# utf-8 to iso8859-1
subprocess.run(["iconv", "-f", "utf-8", "-t", "iso8859-1", COTOVIA_IN_TXT_PATH, "-o", COTOVIA_IN_TXT_PATH_ISO], stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
subprocess.run(["cotovia", "-i", COTOVIA_IN_TXT_PATH_ISO, "-p"], stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
subprocess.run(["iconv", "-f", "iso8859-1", "-t", "utf-8", COTOVIA_OUT_PRE_PATH, "-o", COTOVIA_OUT_PRE_PATH_UTF8], stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
segs = []
try:
with open(COTOVIA_OUT_PRE_PATH_UTF8, 'r') as f:
segs = [line.rstrip() for line in f]
# segs = [remove_tra3_tags(line) for line in segs] # modificar con punctuate_p
segs = [punctuate_p(line) for line in segs] # modificar con punctuate_p
except:
print("ERROR: Couldn't read cotovia output")
subprocess.run(["rm", COTOVIA_IN_TXT_PATH, COTOVIA_IN_TXT_PATH_ISO, COTOVIA_OUT_PRE_PATH, COTOVIA_OUT_PRE_PATH_UTF8], stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
return segs
def text_preprocess(text):
cotovia_preproc_text = to_cotovia(text)
# convert list to string
cotovia_preproc_text_res = ' '.join(cotovia_preproc_text)
# add final punctuation if missing
if cotovia_preproc_text_res[-1] not in string.punctuation:
cotovia_preproc_text_res += '.'
return cotovia_preproc_text_res
def main():
parser = argparse.ArgumentParser(description='Cotovía text normalisation')
parser.add_argument('text', type=str, help='Text to synthetize')
parser.add_argument('model_path', type=str, help='Absolute path to the model checkpoint.pth')
parser.add_argument('config_path', type=str, help='Absolute path to the model config.json')
args = parser.parse_args()
print("Text before preprocessing: ", args.text)
text = text_preprocess(args.text)
print("Text after preprocessing: ", text)
synthesizer = Synthesizer(
args.model_path, args.config_path, None, None, None, None,
)
# Step 1: Extract the first word from the text
first_word = args.text.split()[0] if args.text.split() else "audio"
first_word = sanitize_filename(first_word) # Sanitize to make it a valid filename
# Step 2: Use synthesizer's built-in function to synthesize and save the audio
wavs = synthesizer.tts(text)
filename = f"{first_word}.wav"
synthesizer.save_wav(wavs, filename)
print(f"Audio file saved as: {filename}")
if __name__ == "__main__":
main()