Spaces:

DeepMount00
/

Lexora-Lite-3B-Chat

Sleeping

App Files Files Community

DeepMount00 commited on Nov 9

Commit

c1e9709

•

1 Parent(s): b2de139

Update app.py

Browse files

Files changed (1) hide show

app.py +159 -127

app.py CHANGED Viewed

@@ -14,136 +14,168 @@ subprocess.run(
     shell=True,
 )
 DESCRIPTION = '''
-<div>
-<h1 style="text-align: center;">Lexora-Lite-3B</h1>
-<p>This Space demonstrates the instruction-tuned model <a href="https://huggingface.co/DeepMount00/Lexora-Lite-3B"><b>Lexora-Lite-3B Chat ITA</b></a>.</p>
-</div>
-<div>
-  <p>This model, <strong>DeepMount00/Lexora-Lite-3B</strong>, is currently the best open-source large language model for the Italian language. You can view its ranking and compare it with other models on the leaderboard at <a href="https://huggingface.co/spaces/FinancialSupport/open_ita_llm_leaderboard"><b>this site</b></a>.</p>
 </div>
 '''
-MAX_MAX_NEW_TOKENS = 2048
-DEFAULT_MAX_NEW_TOKENS = 1024
-MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
-device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-model_id = "DeepMount00/Lexora-Lite-3B"
-tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True,)
-model = AutoModelForCausalLM.from_pretrained(
-    model_id,
-    device_map="auto",
-    torch_dtype=torch.bfloat16,
-    attn_implementation="flash_attention_2",
-    trust_remote_code=True,
-)
-model.eval()
-@spaces.GPU(duration=90)
-def generate(
-    message: str,
-    chat_history: list[tuple[str, str]],
-    system_message: str = "",
-    max_new_tokens: int = 2048,
-    temperature: float = 0.0001,
-    top_p: float = 1.0,
-    top_k: int = 50,
-    repetition_penalty: float = 1.0,
-) -> Iterator[str]:
-    conversation = [{"role": "system", "content": system_message}]
-    for user, assistant in chat_history:
-        conversation.extend(
-            [
-                {"role": "user", "content": user},
-                {"role": "assistant", "content": assistant},
-            ]
-        )
-    conversation.append({"role": "user", "content": message})
-    input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
-    if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
-        input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
-        gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
-    input_ids = input_ids.to(model.device)
-    streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
-    generate_kwargs = dict(
-        {"input_ids": input_ids},
-        streamer=streamer,
-        max_new_tokens=max_new_tokens,
-        do_sample=True,
-        top_p=top_p,
-        top_k=top_k,
-        temperature=temperature,
-        num_beams=1,
-        repetition_penalty=repetition_penalty,
-    )
-    t = Thread(target=model.generate, kwargs=generate_kwargs)
-    t.start()
-    outputs = []
-    for text in streamer:
-        outputs.append(text)
-        yield "".join(outputs)
-chat_interface = gr.ChatInterface(
-    fn=generate,
-    additional_inputs=[
-        gr.Textbox(
-            value="",
-            label="System message",
-            render=False,
-        ),
-        gr.Slider(
-            label="Max new tokens",
-            minimum=1,
-            maximum=MAX_MAX_NEW_TOKENS,
-            step=1,
-            value=DEFAULT_MAX_NEW_TOKENS,
-        ),
-        gr.Slider(
-            label="Temperature",
-            minimum=0,
-            maximum=4.0,
-            step=0.1,
-            value=0.001,
-        ),
-        gr.Slider(
-            label="Top-p (nucleus sampling)",
-            minimum=0.05,
-            maximum=1.0,
-            step=0.05,
-            value=1.0,
-        ),
-        gr.Slider(
-            label="Top-k",
-            minimum=1,
-            maximum=1000,
-            step=1,
-            value=50,
-        ),
-        gr.Slider(
-            label="Repetition penalty",
-            minimum=1.0,
-            maximum=2.0,
-            step=0.05,
-            value=1.0,
-        ),
-    ],
-    stop_btn=None,
-    examples=[
-        ["Ciao! Come stai?"],
-    ],
-    cache_examples=False,
-)
-with gr.Blocks(css="style.css", fill_height=True, theme="soft") as demo:
-    gr.Markdown(DESCRIPTION)
-    gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
-    chat_interface.render()
 if __name__ == "__main__":
-    demo.queue(max_size=20).launch()

     shell=True,
 )
+CUSTOM_CSS = """
+.container {
+    max-width: 1000px !important;
+    margin: auto !important;
+    padding-top: 2rem !important;
+}
+.header-container {
+    background: linear-gradient(135deg, #1e3a8a 0%, #3b82f6 100%);
+    padding: 2rem;
+    border-radius: 1rem;
+    margin-bottom: 2rem;
+    color: white;
+    box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
+}
+.model-info {
+    background: white;
+    padding: 1.5rem;
+    border-radius: 0.5rem;
+    margin-top: 1rem;
+    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
+}
+.chat-container {
+    border: 1px solid #e5e7eb;
+    border-radius: 1rem;
+    background: white;
+    box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
+}
+.message {
+    padding: 1rem;
+    margin: 0.5rem;
+    border-radius: 0.5rem;
+}
+.user-message {
+    background: #f3f4f6;
+}
+.assistant-message {
+    background: #dbeafe;
+}
+.controls-container {
+    background: #f8fafc;
+    padding: 1.5rem;
+    border-radius: 0.5rem;
+    margin-top: 1rem;
+}
+.slider-label {
+    font-weight: 600;
+    color: #374151;
+}
+.duplicate-button {
+    background: #2563eb !important;
+    color: white !important;
+    padding: 0.75rem 1.5rem !important;
+    border-radius: 0.5rem !important;
+    font-weight: 600 !important;
+    transition: all 0.2s !important;
+}
+.duplicate-button:hover {
+    background: #1d4ed8 !important;
+    transform: translateY(-1px) !important;
+}
+"""
 DESCRIPTION = '''
+<div class="header-container">
+    <h1 style="font-size: 2.5rem; font-weight: 700; margin-bottom: 1rem; text-align: center;">Lexora-Lite-3B</h1>
+    <div class="model-info">
+        <h2 style="font-size: 1.5rem; font-weight: 600; color: #1e3a8a; margin-bottom: 1rem;">About the Model</h2>
+        <p style="color: #374151; line-height: 1.6;">
+            This Space demonstrates <a href="https://huggingface.co/DeepMount00/Lexora-Lite-3B" style="color: #2563eb; font-weight: 600;">Lexora-Lite-3B Chat ITA</a>,
+            currently the best open-source large language model for the Italian language. Compare its performance with other models on the
+            <a href="https://huggingface.co/spaces/FinancialSupport/open_ita_llm_leaderboard" style="color: #2563eb; font-weight: 600;">official leaderboard</a>.
+        </p>
+    </div>
 </div>
 '''
+# Rest of your existing code remains the same until the Blocks creation
+with gr.Blocks(css=CUSTOM_CSS, theme=gr.themes.Soft(
+    primary_hue="blue",
+    secondary_hue="blue",
+    neutral_hue="slate",
+    font=gr.themes.GoogleFont("Inter"),
+    radius_size=gr.themes.sizes.radius_sm,
+)) as demo:
+    with gr.Column(elem_classes="container"):
+        gr.Markdown(DESCRIPTION)
+        with gr.Column(elem_classes="chat-container"):
+            chat_interface = gr.ChatInterface(
+                fn=generate,
+                additional_inputs=[
+                    gr.Textbox(
+                        value="",
+                        label="System Message",
+                        elem_classes="system-message",
+                        render=False,
+                    ),
+                    gr.Column(elem_classes="controls-container") as controls:
+                        with controls:
+                            gr.Slider(
+                                label="Maximum New Tokens",
+                                minimum=1,
+                                maximum=MAX_MAX_NEW_TOKENS,
+                                step=1,
+                                value=DEFAULT_MAX_NEW_TOKENS,
+                                elem_classes="slider-label",
+                            ),
+                            gr.Slider(
+                                label="Temperature",
+                                minimum=0,
+                                maximum=4.0,
+                                step=0.1,
+                                value=0.001,
+                                elem_classes="slider-label",
+                            ),
+                            gr.Slider(
+                                label="Top-p (Nucleus Sampling)",
+                                minimum=0.05,
+                                maximum=1.0,
+                                step=0.05,
+                                value=1.0,
+                                elem_classes="slider-label",
+                            ),
+                            gr.Slider(
+                                label="Top-k",
+                                minimum=1,
+                                maximum=1000,
+                                step=1,
+                                value=50,
+                                elem_classes="slider-label",
+                            ),
+                            gr.Slider(
+                                label="Repetition Penalty",
+                                minimum=1.0,
+                                maximum=2.0,
+                                step=0.05,
+                                value=1.0,
+                                elem_classes="slider-label",
+                            ),
+                ],
+                examples=[
+                    ["Ciao! Come stai?"],
+                ],
+                cache_examples=False,
+            )
+        gr.DuplicateButton(
+            value="Duplicate Space for Private Use",
+            elem_classes="duplicate-button",
+            elem_id="duplicate-button",
+        )
 if __name__ == "__main__":
+    demo.queue(max_size=20).launch()