Spaces:
Sleeping
Sleeping
Update app.py
Browse filesUpgrade inference.
app.py
CHANGED
@@ -1,10 +1,9 @@
|
|
1 |
import gradio as gr
|
2 |
from huggingface_hub import InferenceClient
|
3 |
|
4 |
-
|
5 |
client = InferenceClient("Qwen/QwQ-32B-Preview")
|
6 |
|
7 |
-
|
8 |
def respond(
|
9 |
message,
|
10 |
history: list[tuple[str, str]],
|
@@ -13,46 +12,80 @@ def respond(
|
|
13 |
temperature,
|
14 |
top_p,
|
15 |
):
|
|
|
16 |
messages = [{"role": "system", "content": system_message}]
|
17 |
-
|
18 |
-
for
|
19 |
-
if
|
20 |
-
messages.append({"role": "user", "content":
|
21 |
-
if
|
22 |
-
messages.append({"role": "assistant", "content":
|
23 |
-
|
24 |
messages.append({"role": "user", "content": message})
|
25 |
-
|
26 |
response = ""
|
27 |
-
|
28 |
-
|
|
|
29 |
messages,
|
30 |
max_tokens=max_tokens,
|
31 |
-
stream=
|
32 |
temperature=temperature,
|
33 |
top_p=top_p,
|
34 |
-
)
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
-
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
demo = gr.ChatInterface(
|
41 |
-
respond,
|
42 |
additional_inputs=[
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
minimum=0.1,
|
48 |
-
maximum=1.0,
|
49 |
-
value=0.95,
|
50 |
-
step=0.05,
|
51 |
-
label="Top-p (nucleus sampling)",
|
52 |
-
),
|
53 |
],
|
|
|
|
|
|
|
54 |
)
|
55 |
|
56 |
-
|
57 |
if __name__ == "__main__":
|
58 |
-
demo.launch()
|
|
|
|
1 |
import gradio as gr
|
2 |
from huggingface_hub import InferenceClient
|
3 |
|
4 |
+
# Inicializar el cliente de inferencia
|
5 |
client = InferenceClient("Qwen/QwQ-32B-Preview")
|
6 |
|
|
|
7 |
def respond(
|
8 |
message,
|
9 |
history: list[tuple[str, str]],
|
|
|
12 |
temperature,
|
13 |
top_p,
|
14 |
):
|
15 |
+
# Construir el contexto de mensajes
|
16 |
messages = [{"role": "system", "content": system_message}]
|
17 |
+
|
18 |
+
for user_msg, assistant_msg in history:
|
19 |
+
if user_msg:
|
20 |
+
messages.append({"role": "user", "content": user_msg})
|
21 |
+
if assistant_msg:
|
22 |
+
messages.append({"role": "assistant", "content": assistant_msg})
|
23 |
+
|
24 |
messages.append({"role": "user", "content": message})
|
25 |
+
|
26 |
response = ""
|
27 |
+
|
28 |
+
# Realizar la llamada al modelo con streaming deshabilitado para capturar toda la respuesta
|
29 |
+
full_response = client.chat_completion(
|
30 |
messages,
|
31 |
max_tokens=max_tokens,
|
32 |
+
stream=False, # Deshabilitar streaming para capturar toda la respuesta de una vez
|
33 |
temperature=temperature,
|
34 |
top_p=top_p,
|
35 |
+
)
|
36 |
+
|
37 |
+
# Extraer el contenido de la respuesta
|
38 |
+
if full_response and full_response.choices:
|
39 |
+
response = full_response.choices[0].message['content']
|
40 |
+
|
41 |
+
return response
|
42 |
|
43 |
+
# Definir los controles adicionales con mayor claridad y rangos adecuados
|
44 |
+
system_message_input = gr.Textbox(
|
45 |
+
value="Responde siempre en español, y el código siempre completo",
|
46 |
+
label="Mensaje del Sistema",
|
47 |
+
lines=2,
|
48 |
+
placeholder="Instrucciones para el modelo...",
|
49 |
+
)
|
50 |
|
51 |
+
max_tokens_slider = gr.Slider(
|
52 |
+
minimum=512,
|
53 |
+
maximum=8192, # Aumentar el máximo si el modelo lo soporta
|
54 |
+
step=1,
|
55 |
+
value=2048, # Aumentar el valor por defecto para respuestas más largas
|
56 |
+
label="Máximo de Tokens Nuevos",
|
57 |
+
)
|
58 |
+
|
59 |
+
temperature_slider = gr.Slider(
|
60 |
+
minimum=0.1,
|
61 |
+
maximum=2.0, # Ajustar el rango para mayor control
|
62 |
+
step=0.1,
|
63 |
+
value=0.7,
|
64 |
+
label="Temperatura",
|
65 |
+
)
|
66 |
+
|
67 |
+
top_p_slider = gr.Slider(
|
68 |
+
minimum=0.1,
|
69 |
+
maximum=1.0,
|
70 |
+
step=0.05,
|
71 |
+
value=0.95,
|
72 |
+
label="Top-p (muestreo por núcleo)",
|
73 |
+
)
|
74 |
+
|
75 |
+
# Configurar la interfaz de chat con una apariencia mejorada
|
76 |
demo = gr.ChatInterface(
|
77 |
+
fn=respond,
|
78 |
additional_inputs=[
|
79 |
+
system_message_input,
|
80 |
+
max_tokens_slider,
|
81 |
+
temperature_slider,
|
82 |
+
top_p_slider,
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
],
|
84 |
+
title="Interfaz de Chat con QwQ-32B-Preview",
|
85 |
+
description="Interactúa con el modelo QwQ-32B-Preview de Hugging Face. Ajusta los parámetros para personalizar las respuestas.",
|
86 |
+
theme="default", # Puedes cambiar a otros temas como "huggingface", "dark", etc.
|
87 |
)
|
88 |
|
|
|
89 |
if __name__ == "__main__":
|
90 |
+
demo.launch(share=True) # `share=True` permite compartir la interfaz públicamente
|
91 |
+
|