KingNish commited on
Commit
39f7f02
1 Parent(s): 3da058c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -53
app.py CHANGED
@@ -1,98 +1,115 @@
1
- import os
2
- import re
3
  import gradio as gr
 
 
 
4
  import edge_tts
5
  import asyncio
6
- import time
7
  import tempfile
 
 
8
  from huggingface_hub import InferenceClient
 
 
 
9
 
10
- DESCRIPTION = """ # <center><b>JARVIS⚡</b></center>
11
- ### <center>A personal Assistant of Tony Stark for YOU
12
- ### <center>Currently It supports text input, But If this space completes 1k hearts than I starts working on Audio Input.</center>
13
- """
14
-
15
- MORE = """ ## TRY Other Models
16
- ### Instant Video: Create Amazing Videos in 5 Second -> https://huggingface.co/spaces/KingNish/Instant-Video
17
- ### Instant Image: 4k images in 5 Second -> https://huggingface.co/spaces/KingNish/Instant-Image
18
- """
19
 
20
- Fast = """## Fastest Model"""
21
 
22
- Complex = """## Best in Complex Question"""
23
 
24
- Detail = """## Best for Detailed Generation or Long Answers"""
 
 
 
 
25
 
26
  client1 = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
27
 
28
  system_instructions1 = "<s>[SYSTEM] Answer as Real Jarvis JARVIS, Made by 'Tony Stark', Keep conversation very short, clear, friendly and concise. The text provided is a request for a specific type of response from you, the virtual assistant. The request asks you to provide friendly responses as if You are the character Jarvis, made by 'Tony Stark.' The expectation is that I will avoid introductions and start answering the query directly, elaborating on all aspects of the request. As an AI-powered assistant, your task is to generate responses that appear as if they are created by the user and not an AI-powered assistant. [USER]"
29
 
30
- async def generate1(prompt):
31
  generate_kwargs = dict(
32
- temperature=0.6,
33
- max_new_tokens=256,
34
  top_p=0.95,
35
  repetition_penalty=1,
36
  do_sample=True,
37
  seed=42,
38
  )
39
- formatted_prompt = system_instructions1 + prompt + "[JARVIS]"
 
40
  stream = client1.text_generation(
41
- formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
42
  output = ""
43
  for response in stream:
44
  if not response.token.text == "</s>":
45
  output += response.token.text
46
 
47
- communicate = edge_tts.Communicate(output)
48
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
49
- tmp_path = tmp_file.name
50
- await communicate.save(tmp_path)
51
- yield tmp_path
52
 
53
- client2 = InferenceClient("meta-llama/Meta-Llama-3-70B-Instruct")
 
 
 
 
 
 
54
 
55
- system_instructions2 = "[SYSTEM] Answer as Real Jarvis JARVIS, Made by 'Tony Stark', Must answer in friendly style and Easy Manner. You can answer Complex Questions. Do not say who are you or Hi, Hello, Just Start answering. Stop, as answer ends. [USER]"
56
 
57
- async def generate2(prompt):
58
- generate_kwargs = dict(
59
- temperature=0.6,
60
- max_new_tokens=512,
61
- top_p=0.95,
62
- repetition_penalty=1,
63
- do_sample=True,
64
- )
65
- formatted_prompt = system_instructions2 + prompt + "[ASSISTANT]"
66
- stream = client2.text_generation(
67
- formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
68
- output = ""
69
- for response in stream:
70
- output += response.token.text
71
 
72
- communicate = edge_tts.Communicate(output)
 
 
 
 
73
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
74
  tmp_path = tmp_file.name
75
  await communicate.save(tmp_path)
76
  yield tmp_path
77
 
78
- client3 = InferenceClient("meta-llama/Meta-Llama-3-70B-Instruct")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
- system_instructions3 = "[SYSTEM]The text provided is a request for a specific type of response from me, the virtual assistant. The request asks me to provide detailed and friendly responses as if I am the character Jarvis, inspired by 'Tony Stark.' The expectation is that I will avoid introductions and start answering the query directly, elaborating on all aspects of the request. As an AI-powered assistant, my task is to generate responses that appear as if they are created by the user and not an AI-powered assistant.[USER]"
81
 
82
- async def generate3(prompt):
83
  generate_kwargs = dict(
84
- temperature=0.6,
85
- max_new_tokens=2048,
86
  top_p=0.95,
87
  repetition_penalty=1,
88
- do_sample=True,
89
- )
90
- formatted_prompt = system_instructions3 + prompt + "[ASSISTANT]"
91
- stream = client3.text_generation(
92
  formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
93
  output = ""
94
  for response in stream:
95
- output += response.token.text
 
96
 
97
  communicate = edge_tts.Communicate(output)
98
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
@@ -102,6 +119,17 @@ async def generate3(prompt):
102
 
103
  with gr.Blocks(css="style.css") as demo:
104
  gr.Markdown(DESCRIPTION)
 
 
 
 
 
 
 
 
 
 
 
105
  with gr.Row():
106
  user_input = gr.Textbox(label="Prompt", value="What is Wikipedia")
107
  input_text = gr.Textbox(label="Input Text", elem_id="important")
@@ -113,7 +141,7 @@ with gr.Blocks(css="style.css") as demo:
113
  translate_btn = gr.Button("Response")
114
  translate_btn.click(fn=generate1, inputs=user_input,
115
  outputs=output_audio, api_name="translate")
116
-
117
  gr.Markdown(MORE)
118
 
119
  if __name__ == "__main__":
 
 
 
1
  import gradio as gr
2
+ from transformers import pipeline
3
+ import requests
4
+ import json
5
  import edge_tts
6
  import asyncio
 
7
  import tempfile
8
+ import os
9
+ import inflect
10
  from huggingface_hub import InferenceClient
11
+ import re
12
+ import time
13
+ from streaming_stt_nemo import Model
14
 
15
+ number_to_word = inflect.engine()
 
 
 
 
 
 
 
 
16
 
17
+ default_lang = "en"
18
 
19
+ engines = { default_lang: Model(default_lang) }
20
 
21
+ def transcribe(audio):
22
+ lang = "en"
23
+ model = engines[lang]
24
+ text = model.stt_file(audio)[0]
25
+ return text
26
 
27
  client1 = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
28
 
29
  system_instructions1 = "<s>[SYSTEM] Answer as Real Jarvis JARVIS, Made by 'Tony Stark', Keep conversation very short, clear, friendly and concise. The text provided is a request for a specific type of response from you, the virtual assistant. The request asks you to provide friendly responses as if You are the character Jarvis, made by 'Tony Stark.' The expectation is that I will avoid introductions and start answering the query directly, elaborating on all aspects of the request. As an AI-powered assistant, your task is to generate responses that appear as if they are created by the user and not an AI-powered assistant. [USER]"
30
 
31
+ def model(text):
32
  generate_kwargs = dict(
33
+ temperature=0.7,
34
+ max_new_tokens=512,
35
  top_p=0.95,
36
  repetition_penalty=1,
37
  do_sample=True,
38
  seed=42,
39
  )
40
+
41
+ formatted_prompt = system_instructions1 + text + "[JARVIS]"
42
  stream = client1.text_generation(
43
+ formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
44
  output = ""
45
  for response in stream:
46
  if not response.token.text == "</s>":
47
  output += response.token.text
48
 
49
+ return output
 
 
 
 
50
 
51
+ def number_to_words(str):
52
+ words = str.split(' ')
53
+ result = []
54
+
55
+ for word in words:
56
+ if( any(char.isdigit() for char in word) ):
57
+ word = number_to_word.number_to_words(word)
58
 
59
+ result.append(word)
60
 
61
+ final_result = ' '.join(result).replace('point', '')
62
+ return final_result
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
+ async def respond(audio):
65
+ user = transcribe(audio)
66
+ reply = model(user)
67
+ reply2 = number_to_words(reply)
68
+ communicate = edge_tts.Communicate(reply2)
69
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
70
  tmp_path = tmp_file.name
71
  await communicate.save(tmp_path)
72
  yield tmp_path
73
 
74
+ DESCRIPTION = """ # <center><b>JARVIS⚡</b></center>
75
+ ### <center>A personal Assistant of Tony Stark for YOU
76
+ ### <center>Voice Chat with your personal Assistant</center>
77
+ """
78
+
79
+ MORE = """ ## TRY Other Models
80
+ ### Instant Video: Create Amazing Videos in 5 Second -> https://huggingface.co/spaces/KingNish/Instant-Video
81
+ ### Instant Image: 4k images in 5 Second -> https://huggingface.co/spaces/KingNish/Instant-Image
82
+ """
83
+
84
+ BETA = """ ### Voice Chat (BETA)"""
85
+
86
+ FAST = """## Fastest Model"""
87
+
88
+ Complex = """## Best in Complex Question"""
89
+
90
+ Detail = """## Best for Detailed Generation or Long Answers"""
91
+
92
+ base_loaded = "mistralai/Mixtral-8x7B-Instruct-v0.1"
93
+
94
+ client1 = InferenceClient(base_loaded)
95
 
96
+ system_instructions1 = "[SYSTEM] Answer as Real Jarvis JARVIS, Made by 'Tony Stark', Keep conversation very short, clear, friendly and concise. The text provided is a request for a specific type of response from you, the virtual assistant. The request asks you to provide friendly responses as if You are the character Jarvis, made by 'Tony Stark.' The expectation is that I will avoid introductions and start answering the query directly, elaborating on all aspects of the request. As an AI-powered assistant, your task is to generate responses that appear as if they are created by the user and not an AI-powered assistant. [USER]"
97
 
98
+ async def generate1(prompt):
99
  generate_kwargs = dict(
100
+ temperature=0.7,
101
+ max_new_tokens=512,
102
  top_p=0.95,
103
  repetition_penalty=1,
104
+ do_sample=False,
105
+ )
106
+ formatted_prompt = system_instructions1 + prompt + "[JARVIS]"
107
+ stream = client1.text_generation(
108
  formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
109
  output = ""
110
  for response in stream:
111
+ if not response.token.text == "</s>":
112
+ output += response.token.text
113
 
114
  communicate = edge_tts.Communicate(output)
115
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
 
119
 
120
  with gr.Blocks(css="style.css") as demo:
121
  gr.Markdown(DESCRIPTION)
122
+ with gr.Row():
123
+ user_input = gr.Audio(label="Voice Chat (BETA)".type="filepath")
124
+ output_audio = gr.Audio(label="JARVIS", type="filepath",
125
+ interactive=False,
126
+ autoplay=True,
127
+ elem_classes="audio")
128
+ with gr.Row():
129
+ translate_btn = gr.Button("Response")
130
+ translate_btn.click(fn=respond, inputs=user_input,
131
+ outputs=output_audio, api_name=False)
132
+ gr.Markdown(FAST)
133
  with gr.Row():
134
  user_input = gr.Textbox(label="Prompt", value="What is Wikipedia")
135
  input_text = gr.Textbox(label="Input Text", elem_id="important")
 
141
  translate_btn = gr.Button("Response")
142
  translate_btn.click(fn=generate1, inputs=user_input,
143
  outputs=output_audio, api_name="translate")
144
+
145
  gr.Markdown(MORE)
146
 
147
  if __name__ == "__main__":