SivaResearch commited on
Commit
6c4d912
·
verified ·
1 Parent(s): b28488a

Update app_1.py

Browse files
Files changed (1) hide show
  1. app_1.py +22 -229
app_1.py CHANGED
@@ -1,32 +1,30 @@
1
- import gradio as gr
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
- import speech_recognition as sr
5
- from gtts import gTTS
6
- from pydub import AudioSegment
7
- import io
8
 
9
  device = "cuda" if torch.cuda.is_available() else "cpu"
10
 
 
11
  def create_prompt_with_chat_format(messages, bos="<s>", eos="</s>", add_bos=True):
12
  formatted_text = ""
13
  for message in messages:
14
  if message["role"] == "system":
15
- formatted_text += "\n" + message["content"] + "\n"
16
  elif message["role"] == "user":
17
- formatted_text += "\n" + message["content"] + "\n"
18
  elif message["role"] == "assistant":
19
- formatted_text += "\n" + message["content"].strip() + eos + "\n"
20
  else:
21
  raise ValueError(
22
- "Tulu chat template only supports 'system', 'user', and 'assistant' roles. Invalid role: {}.".format(
23
  message["role"]
24
  )
25
  )
26
- formatted_text += "\n"
27
  formatted_text = bos + formatted_text if add_bos else formatted_text
28
  return formatted_text
29
 
 
30
  def inference(input_prompts, model, tokenizer):
31
  input_prompts = [
32
  create_prompt_with_chat_format([{"role": "user", "content": input_prompt}], add_bos=False)
@@ -36,8 +34,8 @@ def inference(input_prompts, model, tokenizer):
36
  encodings = tokenizer(input_prompts, padding=True, return_tensors="pt")
37
  encodings = encodings.to(device)
38
 
39
- with torch.no_grad():
40
- outputs = model.generate(encodings.input_ids, do_sample=False, max_length=250)
41
 
42
  output_texts = tokenizer.batch_decode(outputs.detach(), skip_special_tokens=True)
43
 
@@ -47,226 +45,21 @@ def inference(input_prompts, model, tokenizer):
47
  output_texts = [output_text[len(input_prompt) :] for input_prompt, output_text in zip(input_prompts, output_texts)]
48
  return output_texts
49
 
50
- def recognize_speech():
51
- recognizer = sr.Recognizer()
52
- microphone = sr.Microphone()
53
 
54
- with microphone as source:
55
- print("Listening...")
56
- recognizer.adjust_for_ambient_noise(source)
57
- audio_data = recognizer.listen(source, timeout=5)
58
 
59
- try:
60
- print("Recognizing...")
61
- text = recognizer.recognize_google(audio_data, language="hi-IN")
62
- return text
63
- except sr.UnknownValueError:
64
- print("Speech Recognition could not understand audio.")
65
- return ""
66
- except sr.RequestError as e:
67
- print(f"Could not request results from Google Speech Recognition service; {e}")
68
- return ""
69
 
70
- def text_to_speech(text):
71
- tts = gTTS(text=text, lang="hi")
72
- audio_stream = io.BytesIO()
73
- tts.save(audio_stream)
74
- audio = AudioSegment.from_file(io.BytesIO(audio_stream.read()), format="mp3")
75
- return audio
76
-
77
- def respond_to_input(input_text):
78
- output_texts = inference([input_text], model, tokenizer)
79
- output_text = output_texts[0]
80
- output_audio = text_to_speech(output_text)
81
- return output_text, output_audio.export(format="wav")
82
-
83
- examples = [
84
- ["मुझे अपने करियर के बारे में सुझाव दो", "मैं कैसे अध्ययन कर सकता हूँ?"],
85
- ["कृपया मुझे एक कहानी सुनाएं", "ताजमहल के बारे में कुछ बताएं"],
86
- ["मेरा नाम क्या है?", "आपका पसंदीदा फिल्म कौन सी है?"],
87
  ]
 
 
88
 
89
- iface = gr.Interface(
90
- fn=respond_to_input,
91
- inputs=["text", "microphone"],
92
- outputs=["text", "audio"],
93
- live=True,
94
- examples=examples,
95
- title="CAMAI",
96
- description="Type or speak to me, and I'll generate a response!",
97
- theme="light",
98
- )
99
-
100
- iface.launch()
101
-
102
-
103
-
104
-
105
-
106
-
107
- ###############################################################################################################################
108
- # import torch
109
- # from transformers import AutoTokenizer, AutoModelForCausalLM
110
- # import gradio as gr
111
-
112
- # device = "cuda" if torch.cuda.is_available() else "cpu"
113
-
114
-
115
- # def create_prompt_with_chat_format(messages, bos="<s>", eos="</s>", add_bos=True):
116
- # formatted_text = ""
117
- # for message in messages:
118
- # if message["role"] == "system":
119
- # formatted_text += "<|system|>\n" + message["content"] + "\n"
120
- # elif message["role"] == "user":
121
- # formatted_text += "<|user|>\n" + message["content"] + "\n"
122
- # elif message["role"] == "assistant":
123
- # formatted_text += "<|assistant|>\n" + message["content"].strip() + eos + "\n"
124
- # else:
125
- # raise ValueError(
126
- # "Tulu chat template only supports 'system', 'user' and 'assistant' roles. Invalid role: {}.".format(
127
- # message["role"]
128
- # )
129
- # )
130
- # formatted_text += "<|assistant|>\n"
131
- # formatted_text = bos + formatted_text if add_bos else formatted_text
132
- # return formatted_text
133
-
134
-
135
- # def inference(input_prompts, model, tokenizer):
136
- # input_prompts = [
137
- # create_prompt_with_chat_format([{"role": "user", "content": input_prompt}], add_bos=False)
138
- # for input_prompt in input_prompts
139
- # ]
140
-
141
- # encodings = tokenizer(input_prompts, padding=True, return_tensors="pt")
142
- # encodings = encodings.to(device)
143
-
144
- # with torch.inference_mode():
145
- # outputs = model.generate(encodings.input_ids, do_sample=False, max_new_tokens=250)
146
-
147
- # output_texts = tokenizer.batch_decode(outputs.detach(), skip_special_tokens=True)
148
-
149
- # input_prompts = [
150
- # tokenizer.decode(tokenizer.encode(input_prompt), skip_special_tokens=True) for input_prompt in input_prompts
151
- # ]
152
- # output_texts = [output_text[len(input_prompt) :] for input_prompt, output_text in zip(input_prompts, output_texts)]
153
- # return output_texts
154
-
155
-
156
- # model_name = "ai4bharat/Airavata"
157
-
158
- # tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")
159
- # tokenizer.pad_token = tokenizer.eos_token
160
- # model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16).to(device)
161
-
162
- # def respond_to_text(input_text):
163
- # outputs = inference([input_text], model, tokenizer)
164
- # return outputs[0]
165
-
166
-
167
- # input_prompts = [
168
- # "मैं अपने समय प्रबंधन कौशल को कैसे सुधार सकता हूँ? मुझे पांच बिंदु बताएं।",
169
- # "मैं अपने समय प्रबंधन कौशल को कैसे सुधार सकता हूँ? मुझे पांच बिंदु बताएं और उनका वर्णन करें।",
170
- # ]
171
- # iface = gr.Interface(fn=respond_to_text, inputs="text", outputs="text")
172
- # iface.launch()
173
- ########################################################################################
174
-
175
- # import gradio as gr
176
- # from transformers import AutoTokenizer, AutoModelForCausalLM
177
-
178
- # tokenizer = AutoTokenizer.from_pretrained("ai4bharat/Airavata")
179
- # model = AutoModelForCausalLM.from_pretrained("ai4bharat/Airavata")
180
-
181
- # def generate_response(prompt):
182
- # input_ids = tokenizer.encode(prompt, return_tensors="pt", max_length=50)
183
- # output_ids = model.generate(input_ids, max_length=100, num_beams=5, no_repeat_ngram_size=2)
184
- # response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
185
- # return response
186
-
187
- # iface = gr.Interface(
188
- # fn=generate_response,
189
- # inputs="text",
190
- # outputs="text",
191
- # live=True,
192
- # title="Airavata LLMs Chatbot",
193
- # description="Ask me anything, and I'll generate a response!",
194
- # theme="light",
195
- # )
196
-
197
- # iface.launch()
198
-
199
-
200
-
201
-
202
-
203
-
204
-
205
-
206
-
207
-
208
- # import gradio as gr
209
- # import torch
210
- # from transformers import AutoTokenizer, AutoModelForCausalLM
211
-
212
- # device = "cuda" if torch.cuda.is_available() else "cpu"
213
-
214
- # def create_prompt_with_chat_format(messages, bos="<s>", eos="</s>", add_bos=True):
215
- # formatted_text = ""
216
- # for message in messages:
217
- # if message["role"] == "system":
218
- # formatted_text += "\n" + message["content"] + "\n"
219
- # elif message["role"] == "user":
220
- # formatted_text += "\n" + message["content"] + "\n"
221
- # elif message["role"] == "assistant":
222
- # formatted_text += "\n" + message["content"].strip() + eos + "\n"
223
- # else:
224
- # raise ValueError(
225
- # "Tulu chat template only supports 'system', 'user', and 'assistant' roles. Invalid role: {}.".format(
226
- # message["role"]
227
- # )
228
- # )
229
- # formatted_text += "\n"
230
- # formatted_text = bos + formatted_text if add_bos else formatted_text
231
- # return formatted_text
232
-
233
- # def inference(input_prompts, model, tokenizer):
234
- # input_prompts = [
235
- # create_prompt_with_chat_format([{"role": "user", "content": input_prompt}], add_bos=False)
236
- # for input_prompt in input_prompts
237
- # ]
238
-
239
- # encodings = tokenizer(input_prompts, padding=True, return_tensors="pt")
240
- # encodings = encodings.to(device)
241
-
242
- # with torch.no_grad():
243
- # outputs = model.generate(encodings.input_ids, do_sample=False, max_length=250)
244
-
245
- # output_texts = tokenizer.batch_decode(outputs.detach(), skip_special_tokens=True)
246
-
247
- # input_prompts = [
248
- # tokenizer.decode(tokenizer.encode(input_prompt), skip_special_tokens=True) for input_prompt in input_prompts
249
- # ]
250
- # output_texts = [output_text[len(input_prompt) :] for input_prompt, output_text in zip(input_prompts, output_texts)]
251
- # return output_texts
252
-
253
- # model_name = "ai4bharat/Airavata"
254
- # tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")
255
- # tokenizer.pad_token = tokenizer.eos_token
256
- # model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16).to(device)
257
- # examples = [
258
- # ["मुझे अपने करियर के बारे में सुझाव दो", "मैं कैसे अध्ययन कर सकता हूँ?"],
259
- # ["कृपया मुझे एक कहानी सुनाएं", "ताजमहल के बारे में कुछ बताएं"],
260
- # ["मेरा नाम क्या है?", "आपका पसंदीदा फिल्म कौन सी है?"],
261
- # ]
262
-
263
- # iface = gr.Chat(
264
- # model_fn=lambda input_prompts: inference(input_prompts, model, tokenizer),
265
- # inputs=["text"],
266
- # outputs="text",
267
- # examples=examples,
268
- # title="Airavata Chatbot",
269
- # theme="light", # Optional: Set a light theme
270
- # )
271
 
272
- # iface.launch()
 
 
1
  import torch
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import gradio as gr
 
 
 
4
 
5
  device = "cuda" if torch.cuda.is_available() else "cpu"
6
 
7
+
8
  def create_prompt_with_chat_format(messages, bos="<s>", eos="</s>", add_bos=True):
9
  formatted_text = ""
10
  for message in messages:
11
  if message["role"] == "system":
12
+ formatted_text += "<|system|>\n" + message["content"] + "\n"
13
  elif message["role"] == "user":
14
+ formatted_text += "<|user|>\n" + message["content"] + "\n"
15
  elif message["role"] == "assistant":
16
+ formatted_text += "<|assistant|>\n" + message["content"].strip() + eos + "\n"
17
  else:
18
  raise ValueError(
19
+ "Tulu chat template only supports 'system', 'user' and 'assistant' roles. Invalid role: {}.".format(
20
  message["role"]
21
  )
22
  )
23
+ formatted_text += "<|assistant|>\n"
24
  formatted_text = bos + formatted_text if add_bos else formatted_text
25
  return formatted_text
26
 
27
+
28
  def inference(input_prompts, model, tokenizer):
29
  input_prompts = [
30
  create_prompt_with_chat_format([{"role": "user", "content": input_prompt}], add_bos=False)
 
34
  encodings = tokenizer(input_prompts, padding=True, return_tensors="pt")
35
  encodings = encodings.to(device)
36
 
37
+ with torch.inference_mode():
38
+ outputs = model.generate(encodings.input_ids, do_sample=False, max_new_tokens=250)
39
 
40
  output_texts = tokenizer.batch_decode(outputs.detach(), skip_special_tokens=True)
41
 
 
45
  output_texts = [output_text[len(input_prompt) :] for input_prompt, output_text in zip(input_prompts, output_texts)]
46
  return output_texts
47
 
 
 
 
48
 
49
+ model_name = "ai4bharat/Airavata"
 
 
 
50
 
51
+ tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")
52
+ tokenizer.pad_token = tokenizer.eos_token
53
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16).to(device)
 
 
 
 
 
 
 
54
 
55
+ examples= [
56
+ "मैं अपने समय प्रबंधन कौशल को कैसे सुधार सकता हूँ? मुझे पांच बिंदु बताएं।",
57
+ "मैं अपने समय प्रबंधन कौशल को कैसे सुधार सकता हूँ? मुझे पांच बिंदु बताएं और उनका वर्णन करें।",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  ]
59
+ # outputs = inference(input_prompts, model, tokenizer)
60
+ # print(outputs)
61
 
62
+ gr.ChatInterface(fn=inference,
63
+ examples = examples,
64
+ title = "CAMAI ChatBot").launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65