proxectonos
/

Nos_TTS-celtia-vits-graphemes

@@ -21,7 +21,7 @@ This model was trained from scratch using the [Coqui TTS](https://github.com/coq
 A live inference demo can be found in our official page, [here](https://tts.nos.gal/).
-This model was trained using graphemes, so no preprocessing is needed for the input text.
 ## Intended uses and limitations
@@ -30,38 +30,39 @@ You can use this model to generate synthetic speech in Galician.
 ## How to use
 ### Usage
 Required libraries:
 ```bash
 pip install TTS
 ```
-Synthesize a speech using python:
 ```bash
-import tempfile
-import numpy as np
-import os
-import json
-from typing import Optional
-from TTS.config import load_config
-from TTS.utils.manage import ModelManager
-from TTS.utils.synthesizer import Synthesizer
-model_path = # Absolute path to the model checkpoint.pth
-config_path = # Absolute path to the model config.json
-text = "Text to synthetize"
-synthesizer = Synthesizer(
-    model_path, config_path, None, None, None, None,
-)
-wavs = synthesizer.tts(text)
 ```
-## Training
-### Training Procedure
-### Data preparation
 ### Hyperparameter

 A live inference demo can be found in our official page, [here](https://tts.nos.gal/).
+This model was trained using graphemes. A preprocessing with the [Cotovía](http://gtm.uvigo.es/en/transfer/software/cotovia/) tool is needed for the input text.
 ## Intended uses and limitations
 ## How to use
 ### Usage
+#### Cotovía preprocessor
+To generate fonectic transcriptions, the Cotovía tool is needed. The tool can be downloaded from the [SourceForge](https://sourceforge.net/projects/cotovia/files/Debian%20packages/) website. The required debian packages are `cotovia_0.5_amd64.deb` and `cotovia-lang-gl_0.5_all.deb`, that can be installed with the following commands:
+```bash
+sudo dpkg -i cotovia_0.5_amd64.deb
+sudo dpkg -i cotovia-lang-gl_0.5_all.deb
+```
+The tool can be used to generate the phonetic transcription of the text. The following command can be used to generate the phonetic transcription of a text string:
+```bash
+echo "Era unha avioneta... O piloto era pequeno, que se chega a ser dos grandes, tómbate!" | cotovia -p -n -S | iconv -f iso88591 -t utf8
+```
+The output of the command is the phonetic transcription of the input text. This string may be used in the inference part, as shown next.
 Required libraries:
 ```bash
 pip install TTS
 ```
+Synthesize speech using python and the script preprocess.py, avaliable in this repository:
 ```bash
+python preprocess.py text model_path config_path
 ```
+This script takes a text input, preprocesses it with the cotovia tool, synthesizes speech from the preprocessed text, and saves the output as a .wav file.
+## Training
 ### Hyperparameter

preprocess.py ADDED Viewed

	@@ -0,0 +1,255 @@

+import argparse
+import tempfile
+import random
+import re
+import string
+import subprocess
+from typing import Optional
+from TTS.config import load_config
+from TTS.utils.manage import ModelManager
+from TTS.utils.synthesizer import Synthesizer
+PUNCLIST = [';', '?', '¿', ',', ':', '.', '!', '¡']
+def canBeNumber(n):
+    try:
+        int(n)
+        return True
+    except ValueError:
+        # Not a number
+        return False
+def accent_convert(phontrans):
+    transcript = re.sub('a\^','á',phontrans)
+    transcript = re.sub('e\^','é',transcript)
+    transcript = re.sub('i\^','í',transcript)
+    transcript = re.sub('o\^','ó',transcript)
+    transcript = re.sub('u\^','ú',transcript)
+    transcript = re.sub('E\^','É',transcript)
+    transcript = re.sub('O\^','Ó',transcript)
+    return transcript
+def remove_tra3_tags(phontrans):
+    s = re.sub(r'#(.+?)#', r'', phontrans)
+    s = re.sub(r'%(.+?)%', r'', s)
+    s = re.sub(' +',' ',s)
+    s = re.sub('-','',s)
+    return s.strip()
+def sanitize_filename(filename):
+    """Remove or replace any characters that are not allowed in file names."""
+    return ''.join(c for c in filename if c.isalnum() or c in (' ', '_', '-')).rstrip()
+def is_number(index, text):
+    if index == 0:
+        return False
+    elif index == len(text) - 1:
+        return False
+    else:
+        return canBeNumber(text[index - 1]) and canBeNumber(text[index + 1])
+#Splits text from punctuation marks, gives list of segments in between and the punctuation marks. Skips punctuation not present in training.
+def split_punc(text):
+    segments = []
+    puncs = []
+    curr_seg = ""
+    previous_punc = False
+    for i, c in enumerate(text):
+        if c in PUNCLIST and not previous_punc and not is_number(i, text):
+            curr_seg += c
+            segments.append(curr_seg.strip())
+            puncs.append(c)
+            curr_seg = ""
+            previous_punc = True
+        elif c in PUNCLIST and previous_punc:
+            curr_seg += c
+            puncs[-1] += c
+        else:
+            curr_seg += c
+            previous_punc = False
+    segments.append(curr_seg.strip())
+    # print("Split Segments: ", segments)
+    #Remove empty segments in the list
+    segments = filter(None, segments)
+    # store segments as a list
+    segments = list(segments)
+    # print("Split Segments: ", segments)
+    # print("Split Puncs: ", puncs)
+    return segments, puncs
+def merge_punc(text_segs, puncs):
+    merged_str = ""
+    # print("Text segs: ", text_segs)
+    # print("Puncs: ", puncs)
+    for i, seg in enumerate(text_segs):
+        merged_str += seg + " "
+        if i < len(puncs):
+            merged_str += puncs[i] + " "
+    # remove spaces before , . ! ? ; : ) ] of the merged string
+    merged_str = re.sub(r"\s+([.,!?;:)\]])", r"\1", merged_str)
+    # remove spaces after ( [ ¡ ¿ of the merged string
+    merged_str = re.sub(r"([\(\[¡¿])\s+", r"\1", merged_str)
+    # print("Merged str: ", merged_str)
+    return merged_str.strip()
+# función que engade a puntuación orixinal á extensión de números de cotovía (opción p)
+def punctuate_p(str_ext):
+    # substitute ' ·\n' by ...
+    str_ext = re.sub(r" ·", r"...", str_ext)
+    # remove spaces before , . ! ? ; : ) ] of the extended string
+    str_ext = re.sub(r"\s+([.,!?;:)\]])", r"\1", str_ext)
+    # remove spaces after ( [ ¡ ¿ of the extended string
+    str_ext = re.sub(r"([\(\[¡¿])\s+", r"\1", str_ext)
+    # remove unwanted spaces between quotations marks
+    str_ext = re.sub(r'"\s*([^"]*?)\s*"', r'"\1"', str_ext)
+    # substitute '- text -' to '-text-'
+    str_ext = re.sub(r"-\s*([^-]*?)\s*-", r"-\1-", str_ext)
+    # remove initial question marks
+    str_ext = re.sub(r"[¿¡]", r"", str_ext)
+    # eliminate extra spaces
+    str_ext = re.sub(r"\s+", r" ", str_ext)
+    str_ext = re.sub(r"(\d+)\s*-\s*(\d+)", r"\1 \2", str_ext)
+    ### - , ' and () by commas
+    # substitute '- text -' to ', text,'
+    str_ext = re.sub(r"(\w+)\s+-([^-]*?)-\s+([^-]*?)", r"\1, \2, ", str_ext)
+    # substitute ' - ' by ', '
+    str_ext = re.sub(r"(\w+[!\?]?)\s+-\s*", r"\1, ", str_ext)
+    # substitute ' ( text )' to ', text,'
+    str_ext = re.sub(r"(\w+)\s*\(\s*([^\(\)]*?)\s*\)", r"\1, \2,", str_ext)
+    return str_ext
+def to_cotovia(text_segments):
+    # Input and output Cotovía files
+    res = ''.join(random.choices(string.ascii_lowercase + string.digits, k=5))
+    COTOVIA_IN_TXT_PATH = res + '.txt'
+    COTOVIA_IN_TXT_PATH_ISO = 'iso8859-1' + res + '.txt'
+    COTOVIA_OUT_PRE_PATH = 'iso8859-1' + res + '.tra'
+    COTOVIA_OUT_PRE_PATH_UTF8 = 'utf8' + res + '.tra'
+    # print("Text segments: ", text_segments)
+    # Initial text preprocessing
+    # substitute ' M€' by 'millóns de euros' and 'somewordM€' by 'someword millóns de euros'
+    text_segments = [re.sub(r"(\w+)\s*M€", r"\1 millóns de euros", seg) for seg in text_segments]
+    # substitute ' €' by 'euros' and 'someword€' by 'someword euros'
+    text_segments = [re.sub(r"(\w+)\s*€", r"\1 euros", seg) for seg in text_segments]
+    # substitute ' ºC' by 'graos centígrados' and 'somewordºC' by 'someword graos centígrados'
+    text_segments = [re.sub(r"(\w+)\s*ºC", r"\1 graos centígrados", seg) for seg in text_segments]
+    text_segments = [subprocess.run(["sed", "-e", "s/₂//g", "-e", "s/⸺//g", "-e", "s/ //g", "-e", "s///g", "-e", "s/č/c/g", "-e", "s/ț/t/g", "-e", "s/ğ/g/g", "-e", "s/ș/s/g",
+                "-e", "s/ş/s/g", "-e", "s/Ž/Z/g", "-e", "s/ž/z/g", "-e", "s/ț/t/g", "-e", "s/ğ/g/g", "-e", "s/ș/s/g", "-e", "s/ş/s/g", "-e", "s/«//g", "-e", "s/»//g",
+                "-e", "s/<<//g", "-e", "s/>>//g", "-e", "s/“/\"/g", "-e", "s/”/'\"'/g", "-e", "s/\'//g", "-e", "s/‘//g", "-e", "s/’//g", "-e", "s/…//g",
+                "-e", "s/-/-/g", "-e", "s/–/-/g", "-e", "s/—/-/g", "-e", "s/―/-/g", "-e", "s/−/-/g", "-e", "s/‒/-/g", "-e", "s/─/-/g", "-e", "s/^Si$/Si\./g"],
+                input=seg, text=True, capture_output=True).stdout for seg in text_segments]
+    # print("Text segments after sed: ", text_segments)
+    with open(COTOVIA_IN_TXT_PATH, 'w') as f:
+        for seg in text_segments:
+            if seg:
+                f.write(seg + '\n')
+            else:
+                f.write(',' + '\n')
+    # utf-8 to iso8859-1
+    subprocess.run(["iconv", "-f", "utf-8", "-t", "iso8859-1", COTOVIA_IN_TXT_PATH, "-o", COTOVIA_IN_TXT_PATH_ISO], stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
+    # call cotovia with -t3 option
+    subprocess.run(["cotovia", "-i", COTOVIA_IN_TXT_PATH_ISO, "-t3", "-n"], stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
+    # iso8859-1 to utf-8
+    subprocess.run(["iconv", "-f", "iso8859-1", "-t", "utf-8", COTOVIA_OUT_PRE_PATH, "-o", COTOVIA_OUT_PRE_PATH_UTF8], stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
+    segs = []
+    try:
+        with open(COTOVIA_OUT_PRE_PATH_UTF8, 'r') as f:
+            segs = [line.rstrip() for line in f]
+            segs = [remove_tra3_tags(line) for line in segs]
+    except:
+        print("ERROR: Couldn't read cotovia output")
+    subprocess.run(["rm", COTOVIA_IN_TXT_PATH, COTOVIA_IN_TXT_PATH_ISO, COTOVIA_OUT_PRE_PATH, COTOVIA_OUT_PRE_PATH_UTF8], stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
+    # print("Cotovia segments: ", segs)
+    return segs
+def text_preprocess(text):
+    #Split from punc
+    text_segments, puncs = split_punc(text)
+    cotovia_phon_segs = to_cotovia(text_segments)
+    cotovia_phon_str = merge_punc(cotovia_phon_segs, puncs)
+    phon_str = accent_convert(cotovia_phon_str)
+    # remove extra spaces
+    phon_str = re.sub(r"\s+", r" ", phon_str)
+    # add final punctuation mark if it is not present
+    if not re.match(r"[.!?]", phon_str[-1]):
+        phon_str = phon_str + "."
+    return phon_str
+def main():
+    parser = argparse.ArgumentParser(description='Cotovia phoneme transcription.')
+    parser.add_argument('text', type=str, help='Text to synthetize')
+    parser.add_argument('model_path', type=str, help='Absolute path to the model checkpoint.pth')
+    parser.add_argument('config_path', type=str, help='Absolute path to the model config.json')
+    args = parser.parse_args()
+    print("Text before preprocessing: ", args.text)
+    text = text_preprocess(args.text)
+    print("Text after preprocessing: ", text)
+    synthesizer = Synthesizer(
+        args.model_path, args.config_path, None, None, None, None,
+    )
+    wavs = synthesizer.tts(text)
+    # Step 1: Extract the first word from the text
+    first_word = args.text.split()[0] if args.text.split() else "audio"
+    first_word = sanitize_filename(first_word)  # Sanitize to make it a valid filename
+    # Step 2: Use synthesizer's built-in function to synthesize and save the audio
+    wavs = synthesizer.tts(text)
+    filename = f"{first_word}.wav"
+    synthesizer.save_wav(wavs, filename)
+    print(f"Audio file saved as: {filename}")
+if __name__ == "__main__":
+    main()