# This file needs to be run in the main folder # %% import text from utils import read_lines_from_file def write_lines_to_file(path, lines, mode='w', encoding='utf-8'): with open(path, mode, encoding=encoding) as f: for i, line in enumerate(lines): if i == len(lines)-1: f.write(line) break f.write(line + '\n') # %% lines = read_lines_from_file(r'C:\Users\ni-user\Downloads\New folder\tts-arabic-pytorch\datatext.txt') #lines = read_lines_from_file('./data/test-orthographic-transcript.txt') new_lines_arabic = [] new_lines_phonetic = [] new_lines_buckw = [] for line in lines: wav_name, utterance = line.split('" "') wav_name, utterance = wav_name[1:], utterance[:-1] utterance = utterance.replace("a~", "~a") \ .replace("i~", "~i") \ .replace("u~", "~u") \ .replace(" - ", " ") utterance_arab = text.arabic_to_buckwalter(utterance) utterance_phon = text.arabic_to_phonemes(utterance) line_new_ara = f'"{wav_name}" "{utterance_arab}"' new_lines_arabic.append(line_new_ara) line_new_pho = f'"{wav_name}" "{utterance_phon}"' new_lines_phonetic.append(line_new_pho) line_new_buckw = f'"{wav_name}" "{utterance}"' new_lines_arabic.append(line_new_buckw) # %% train write_lines_to_file('./data/SA/train_arab.txt', new_lines_arabic) write_lines_to_file('./data/SA/train_phon.txt', new_lines_phonetic) write_lines_to_file('./data/SA/train_buckw.txt', new_lines_buckw) # %% test # write_lines_to_file('./data/test_arab.txt', new_lines_arabic) # write_lines_to_file('./data/test_phon.txt', new_lines_phonetic) # write_lines_to_file('./data/test_buckw.txt', new_lines_buckw)