TheUpperCaseGuy commited on
Commit
235ca61
·
1 Parent(s): 70d9a68

gradio interface created

Browse files
Files changed (2) hide show
  1. app.py +119 -0
  2. requirements.txt +10 -0
app.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import SpeechT5ForTextToSpeech, SpeechT5Processor, SpeechT5HifiGan
3
+ import soundfile as sf
4
+ import gradio as gr
5
+ import scipy.io.wavfile as wav
6
+ import numpy as np
7
+ import wave
8
+ from datasets import load_dataset, Audio, config
9
+ from IPython.display import Audio
10
+
11
+ # Load the TTS model from the Hugging Face Hub
12
+ checkpoint = "TheUpperCaseGuy/Guy-Urdu-TTS" # Replace with your actual model name
13
+ processor = SpeechT5Processor.from_pretrained(checkpoint)
14
+ model = SpeechT5ForTextToSpeech.from_pretrained(checkpoint)
15
+ tokenizer = processor.tokenizer
16
+ vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
17
+
18
+
19
+ # Buckwalter to Unicode mapping
20
+ buck2uni = {
21
+ u"\u0627":"A",
22
+ u"\u0627":"A",
23
+ u"\u0675":"A",
24
+ u"\u0673":"A",
25
+ u"\u0630":"A",
26
+ u"\u0622":"AA",
27
+ u"\u0628":"B",
28
+ u"\u067E":"P",
29
+ u"\u062A":"T",
30
+ u"\u0637":"T",
31
+ u"\u0679":"T",
32
+ u"\u062C":"J",
33
+ u"\u0633":"S",
34
+ u"\u062B":"S",
35
+ u"\u0635":"S",
36
+ u"\u0686":"CH",
37
+ u"\u062D":"H",
38
+ u"\u0647":"H",
39
+ u"\u0629":"H",
40
+ u"\u06DF":"H",
41
+ u"\u062E":"KH",
42
+ u"\u062F":"D",
43
+ u"\u0688":"D",
44
+ u"\u0630":"Z",
45
+ u"\u0632":"Z",
46
+ u"\u0636":"Z",
47
+ u"\u0638":"Z",
48
+ u"\u068E":"Z",
49
+ u"\u0631":"R",
50
+ u"\u0691":"R",
51
+ u"\u0634":"SH",
52
+ u"\u063A":"GH",
53
+ u"\u0641":"F",
54
+ u"\u06A9":"K",
55
+ u"\u0642":"K",
56
+ u"\u06AF":"G",
57
+ u"\u0644":"L",
58
+ u"\u0645":"M",
59
+ u"\u0646":"N",
60
+ u"\u06BA":"N",
61
+ u"\u0648":"O",
62
+ u"\u0649":"Y",
63
+ u"\u0626":"Y",
64
+ u"\u06CC":"Y",
65
+ u"\u06D2":"E",
66
+ u"\u06C1":"H",
67
+ u"\u064A":"E" ,
68
+ u"\u06C2":"AH" ,
69
+ u"\u06BE":"H" ,
70
+ u"\u0639":"A" ,
71
+ u"\u0643":"K" ,
72
+ u"\u0621":"A",
73
+ u"\u0624":"O",
74
+ u"\u060C":"" #seperator ulta comma
75
+ }
76
+
77
+ def transString(string, reverse=0):
78
+ """Given a Unicode string, transliterate into Buckwalter. To go from
79
+ Buckwalter back to Unicode, set reverse=1"""
80
+ for k, v in buck2uni.items():
81
+ if not reverse:
82
+ string = string.replace(k, v)
83
+ else:
84
+ string = string.replace(v, k)
85
+ return string
86
+
87
+
88
+ def generate_audio(text):
89
+ # Convert input text to Roman Urdu
90
+ roman_urdu = transString(text)
91
+
92
+ # Tokenize the input text
93
+ inputs = processor(text=roman_urdu, return_tensors="pt", type = "numpy")
94
+
95
+ # Generate audio from the SpeechT5 model
96
+ speaker_embeddings = torch.tensor(np.load("speaker_embeddings.npy"))
97
+ speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
98
+
99
+ return speech
100
+
101
+ def text_to_speech(text):
102
+ # Generate audio
103
+ audio_output = generate_audio(text)
104
+
105
+ output_path = "output.wav"
106
+ sf.write(output_path, audio_output.numpy(), 16000, "PCM_16")
107
+
108
+ return output_path
109
+
110
+
111
+ examples = [
112
+ ['میں ٹھیک ہوں، شکریہ! اور آپ؟'],
113
+ ['آپ سَے ملکر خوشی ہوًی!'],
114
+ ]
115
+
116
+
117
+ interface = gr.Interface(fn=text_to_speech, inputs="text", outputs="audio", verbose = True, title="Urdu TTS",
118
+ description = "A simple Urdu Text to Speech Application. It is not by any means perfect and will not work for all text. You can sometimes expect it to generate random noise on an input of your choice. Right now it works successfully on very basic urdu text, such the ones in the example.", examples = examples)
119
+ interface.launch()
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ soundfile
4
+ gradio
5
+ torchaudio
6
+ SentencePiece
7
+ speechbrain
8
+ wave
9
+ datasets
10
+ IPython