Uhhy commited on
Commit
506360c
1 Parent(s): dcab251

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -105
app.py CHANGED
@@ -8,8 +8,6 @@ import gradio as gr
8
  import os
9
  from spaces import GPU
10
  from dotenv import load_dotenv
11
- import torch
12
- from diffusers import DiffusionPipeline
13
 
14
  load_dotenv()
15
 
@@ -39,9 +37,11 @@ model_configs = [
39
  {"repo_id": "Ffftdtd5dtft/Qwen2-7B-Instruct-Q2_K-GGUF", "filename": "qwen2-7b-instruct-q2_k.gguf", "name": "Qwen2 7B Instruct"},
40
  {"repo_id": "Ffftdtd5dtft/starcoder2-3b-Q2_K-GGUF", "filename": "starcoder2-3b-q2_k.gguf", "name": "Starcoder2 3B"},
41
  {"repo_id": "Ffftdtd5dtft/Qwen2-1.5B-Instruct-Q2_K-GGUF", "filename": "qwen2-1.5b-instruct-q2_k.gguf", "name": "Qwen2 1.5B Instruct"},
 
42
  {"repo_id": "Ffftdtd5dtft/Mistral-Nemo-Instruct-2407-Q2_K-GGUF", "filename": "mistral-nemo-instruct-2407-q2_k.gguf", "name": "Mistral Nemo Instruct 2407"},
43
  {"repo_id": "Ffftdtd5dtft/Hermes-3-Llama-3.1-8B-IQ1_S-GGUF", "filename": "hermes-3-llama-3.1-8b-iq1_s-imat.gguf", "name": "Hermes 3 Llama 3.1-8B"},
44
  {"repo_id": "Ffftdtd5dtft/Phi-3.5-mini-instruct-Q2_K-GGUF", "filename": "phi-3.5-mini-instruct-q2_k.gguf", "name": "Phi 3.5 Mini Instruct"},
 
45
  {"repo_id": "Ffftdtd5dtft/codegemma-2b-IQ1_S-GGUF", "filename": "codegemma-2b-iq1_s-imat.gguf", "name": "Codegemma 2B"},
46
  {"repo_id": "Ffftdtd5dtft/Phi-3-mini-128k-instruct-IQ2_XXS-GGUF", "filename": "phi-3-mini-128k-instruct-iq2_xxs-imat.gguf", "name": "Phi 3 Mini 128K Instruct XXS"},
47
  {"repo_id": "Ffftdtd5dtft/TinyLlama-1.1B-Chat-v1.0-IQ1_S-GGUF", "filename": "tinyllama-1.1b-chat-v1.0-iq1_s-imat.gguf", "name": "TinyLlama 1.1B Chat"},
@@ -88,13 +88,7 @@ def remove_duplicates(text):
88
  seen_lines.add(line)
89
  return '\n'.join(unique_lines)
90
 
91
- dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
92
- device = "cuda" if torch.cuda.is_available() else "cpu"
93
- pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=dtype).to(device)
94
- MAX_SEED = np.iinfo(np.int32).max
95
- MAX_IMAGE_SIZE = 2048
96
-
97
- @spaces.GPU()
98
  def generate_model_response(model, inputs):
99
  try:
100
  response = model(inputs)
@@ -103,21 +97,6 @@ def generate_model_response(model, inputs):
103
  print(f"Error generating model response: {e}")
104
  return ""
105
 
106
- @spaces.GPU()
107
- def infer(prompt, seed=42, randomize_seed=False, width=1024, height=1024, num_inference_steps=4):
108
- if randomize_seed:
109
- seed = random.randint(0, MAX_SEED)
110
- generator = torch.Generator(device=device).manual_seed(seed)
111
- image = pipe(
112
- prompt=prompt,
113
- width=width,
114
- height=height,
115
- num_inference_steps=num_inference_steps,
116
- generator=generator,
117
- guidance_scale=0.0
118
- ).images[0]
119
- return image, seed
120
-
121
  def remove_repetitive_responses(responses):
122
  unique_responses = {}
123
  for response in responses:
@@ -145,88 +124,15 @@ async def process_message(message):
145
  """
146
  return formatted_response, curl_command
147
 
148
- examples = [
149
- "a tiny astronaut hatching from an egg on the moon",
150
- "a cat holding a sign that says hello world",
151
- "an anime illustration of a wiener schnitzel",
152
- ]
153
 
154
- css="""
155
- #col-container {
156
- margin: 0 auto;
157
- max-width: 520px;
158
- }
159
- """
160
-
161
- with gr.Blocks(css=css) as demo:
162
- with gr.Column(elem_id="col-container"):
163
- gr.Markdown(f"""# FLUX.1 [schnell]
164
- 12B param rectified flow transformer distilled from [FLUX.1 [pro]](https://blackforestlabs.ai/) for 4 step generation
165
- [[blog](https://blackforestlabs.ai/announcing-black-forest-labs/)] [[model](https://huggingface.co/black-forest-labs/FLUX.1-schnell)]
166
- """)
167
-
168
- with gr.Row():
169
- prompt = gr.Text(
170
- label="Prompt",
171
- show_label=False,
172
- max_lines=1,
173
- placeholder="Enter your prompt",
174
- container=False,
175
- )
176
- run_button = gr.Button("Run", scale=0)
177
-
178
- result = gr.Image(label="Result", show_label=False)
179
-
180
- with gr.Accordion("Advanced Settings", open=False):
181
- seed = gr.Slider(
182
- label="Seed",
183
- minimum=0,
184
- maximum=MAX_SEED,
185
- step=1,
186
- value=0,
187
- )
188
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
189
-
190
- with gr.Row():
191
- width = gr.Slider(
192
- label="Width",
193
- minimum=256,
194
- maximum=MAX_IMAGE_SIZE,
195
- step=32,
196
- value=1024,
197
- )
198
- height = gr.Slider(
199
- label="Height",
200
- minimum=256,
201
- maximum=MAX_IMAGE_SIZE,
202
- step=32,
203
- value=1024,
204
- )
205
-
206
- with gr.Row():
207
- num_inference_steps = gr.Slider(
208
- label="Number of inference steps",
209
- minimum=1,
210
- maximum=50,
211
- step=1,
212
- value=4,
213
- )
214
-
215
- gr.Examples(
216
- examples=examples,
217
- fn=infer,
218
- inputs=[prompt],
219
- outputs=[result, seed],
220
- cache_examples="lazy"
221
- )
222
-
223
- gr.on(
224
- triggers=[run_button.click, prompt.submit],
225
- fn=infer,
226
- inputs=[prompt, seed, randomize_seed, width, height, num_inference_steps],
227
- outputs=[result, seed]
228
- )
229
 
230
  if __name__ == "__main__":
231
  port = int(os.environ.get("PORT", 7860))
232
- demo.launch(server_port=port)
 
8
  import os
9
  from spaces import GPU
10
  from dotenv import load_dotenv
 
 
11
 
12
  load_dotenv()
13
 
 
37
  {"repo_id": "Ffftdtd5dtft/Qwen2-7B-Instruct-Q2_K-GGUF", "filename": "qwen2-7b-instruct-q2_k.gguf", "name": "Qwen2 7B Instruct"},
38
  {"repo_id": "Ffftdtd5dtft/starcoder2-3b-Q2_K-GGUF", "filename": "starcoder2-3b-q2_k.gguf", "name": "Starcoder2 3B"},
39
  {"repo_id": "Ffftdtd5dtft/Qwen2-1.5B-Instruct-Q2_K-GGUF", "filename": "qwen2-1.5b-instruct-q2_k.gguf", "name": "Qwen2 1.5B Instruct"},
40
+ {"repo_id": "Ffftdtd5dtft/Meta-Llama-3.1-70B-Q2_K-GGUF", "filename": "meta-llama-3.1-70b-q2_k.gguf", "name": "Meta Llama 3.1-70B"},
41
  {"repo_id": "Ffftdtd5dtft/Mistral-Nemo-Instruct-2407-Q2_K-GGUF", "filename": "mistral-nemo-instruct-2407-q2_k.gguf", "name": "Mistral Nemo Instruct 2407"},
42
  {"repo_id": "Ffftdtd5dtft/Hermes-3-Llama-3.1-8B-IQ1_S-GGUF", "filename": "hermes-3-llama-3.1-8b-iq1_s-imat.gguf", "name": "Hermes 3 Llama 3.1-8B"},
43
  {"repo_id": "Ffftdtd5dtft/Phi-3.5-mini-instruct-Q2_K-GGUF", "filename": "phi-3.5-mini-instruct-q2_k.gguf", "name": "Phi 3.5 Mini Instruct"},
44
+ {"repo_id": "Ffftdtd5dtft/Meta-Llama-3.1-70B-Instruct-Q2_K-GGUF", "filename": "meta-llama-3.1-70b-instruct-q2_k.gguf", "name": "Meta Llama 3.1-70B Instruct"},
45
  {"repo_id": "Ffftdtd5dtft/codegemma-2b-IQ1_S-GGUF", "filename": "codegemma-2b-iq1_s-imat.gguf", "name": "Codegemma 2B"},
46
  {"repo_id": "Ffftdtd5dtft/Phi-3-mini-128k-instruct-IQ2_XXS-GGUF", "filename": "phi-3-mini-128k-instruct-iq2_xxs-imat.gguf", "name": "Phi 3 Mini 128K Instruct XXS"},
47
  {"repo_id": "Ffftdtd5dtft/TinyLlama-1.1B-Chat-v1.0-IQ1_S-GGUF", "filename": "tinyllama-1.1b-chat-v1.0-iq1_s-imat.gguf", "name": "TinyLlama 1.1B Chat"},
 
88
  seen_lines.add(line)
89
  return '\n'.join(unique_lines)
90
 
91
+ @GPU(duration=1)
 
 
 
 
 
 
92
  def generate_model_response(model, inputs):
93
  try:
94
  response = model(inputs)
 
97
  print(f"Error generating model response: {e}")
98
  return ""
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  def remove_repetitive_responses(responses):
101
  unique_responses = {}
102
  for response in responses:
 
124
  """
125
  return formatted_response, curl_command
126
 
 
 
 
 
 
127
 
128
+ iface = gr.Interface(
129
+ fn=process_message,
130
+ inputs=gr.Textbox(lines=2, placeholder="Enter your message here..."),
131
+ outputs=[gr.Markdown(), gr.Textbox(label="cURL command")],
132
+ title="Multi-Model LLM API",
133
+ description="Enter a message and get responses from multiple LLMs.",
134
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
  if __name__ == "__main__":
137
  port = int(os.environ.get("PORT", 7860))
138
+ iface.launch(server_port=port)