Spaces:

DeepMount00
/

Lexora-Lite-3B-Chat

Running on Zero

App Files Files Community

DeepMount00 commited on Dec 3, 2024

Commit

9a64687

verified ·

1 Parent(s): f24ac48

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -221

app.py CHANGED Viewed

@@ -20,7 +20,7 @@ DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-model_id = "DeepMount00/Lexora-Lite-3B"
 tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(
@@ -32,137 +32,13 @@ model = AutoModelForCausalLM.from_pretrained(
 )
 model.eval()
-CUSTOM_CSS = """
-.container {
-    max-width: 1000px !important;
-    margin: auto !important;
-}
-.header {
-    text-align: center;
-    margin-bottom: 1rem;
-    padding: 1rem;
-}
-.header h1 {
-    font-size: 2rem;
-    font-weight: 600;
-    color: #1e293b;
-    margin-bottom: 0.5rem;
-}
-.header p {
-    color: #64748b;
-    font-size: 1rem;
-}
-.chat-container {
-    border-radius: 0.75rem;
-    background: white;
-    box-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1);
-    height: calc(100vh - 200px);
-    display: flex;
-    flex-direction: column;
-}
-.message-container {
-    padding: 1rem;
-    margin-bottom: 0.5rem;
-}
-.user-message {
-    background: #f8fafc;
-    border-left: 3px solid #2563eb;
-    padding: 1rem;
-    margin: 0.5rem 0;
-    border-radius: 0.5rem;
-}
-.assistant-message {
-    background: white;
-    border-left: 3px solid #64748b;
-    padding: 1rem;
-    margin: 0.5rem 0;
-    border-radius: 0.5rem;
-}
-.controls-panel {
-    position: fixed;
-    right: 1rem;
-    top: 1rem;
-    width: 300px;
-    background: white;
-    padding: 1rem;
-    border-radius: 0.5rem;
-    box-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1);
-    z-index: 1000;
-    display: none;
-}
-.controls-button {
-    position: fixed;
-    right: 1rem;
-    top: 1rem;
-    z-index: 1001;
-    background: #2563eb !important;
-    color: white !important;
-    padding: 0.5rem 1rem !important;
-    border-radius: 0.5rem !important;
-    font-size: 0.875rem !important;
-    font-weight: 500 !important;
-}
-.input-area {
-    border-top: 1px solid #e2e8f0;
-    padding: 1rem;
-    background: white;
-    border-radius: 0 0 0.75rem 0.75rem;
-}
-.textbox {
-    border: 1px solid #e2e8f0 !important;
-    border-radius: 0.5rem !important;
-    padding: 0.75rem !important;
-    font-size: 1rem !important;
-    box-shadow: 0 1px 2px 0 rgb(0 0 0 / 0.05) !important;
-}
-.textbox:focus {
-    border-color: #2563eb !important;
-    outline: none !important;
-    box-shadow: 0 0 0 2px rgba(37, 99, 235, 0.2) !important;
-}
-.submit-button {
-    background: #2563eb !important;
-    color: white !important;
-    padding: 0.5rem 1rem !important;
-    border-radius: 0.5rem !important;
-    font-size: 0.875rem !important;
-    font-weight: 500 !important;
-    transition: all 0.2s !important;
-}
-.submit-button:hover {
-    background: #1d4ed8 !important;
-}
-"""
-DESCRIPTION = '''
-<div class="header">
-    <h1>Lexora-Lite-3B Chat</h1>
-    <p>An advanced Italian language model ready to assist you</p>
-</div>
-'''
-# Generate function remains the same
 @spaces.GPU(duration=90)
 def generate(
     message: str,
     chat_history: list[tuple[str, str]],
     system_message: str = "",
-    max_new_tokens: int = 2048,
-    temperature: float = 0.0001,
     top_p: float = 1.0,
     top_k: int = 50,
     repetition_penalty: float = 1.0,
@@ -203,102 +79,62 @@ def generate(
         outputs.append(text)
         yield "".join(outputs)
-def create_chat_interface():
-    theme = gr.themes.Soft(
-        primary_hue="blue",
-        secondary_hue="slate",
-        neutral_hue="slate",
-        font=gr.themes.GoogleFont("Inter"),
-        radius_size=gr.themes.sizes.radius_sm,
-    )
-    with gr.Blocks(css=CUSTOM_CSS, theme=theme) as demo:
-        gr.Markdown(DESCRIPTION)
-        with gr.Row():
-            # Main chat column
-            with gr.Column(scale=3):
-                chat = gr.ChatInterface(
-                    fn=generate,
-                    additional_inputs=[
-                        gr.Textbox(
-                            value="",
-                            label="System Message",
-                            visible=False,
-                        ),
-                        gr.Slider(
-                            label="Temperature",
-                            minimum=0,
-                            maximum=1.0,
-                            step=0.1,
-                            value=0.0001,
-                            visible=False,
-                        ),
-                        gr.Slider(
-                            label="Top-p",
-                            minimum=0.05,
-                            maximum=1.0,
-                            step=0.05,
-                            value=1.0,
-                            visible=False,
-                        ),
-                        gr.Slider(
-                            label="Top-k",
-                            minimum=1,
-                            maximum=1000,
-                            step=1,
-                            value=50,
-                            visible=False,
-                        ),
-                        gr.Slider(
-                            label="Repetition Penalty",
-                            minimum=1.0,
-                            maximum=2.0,
-                            step=0.05,
-                            value=1.0,
-                            visible=False,
-                        ),
-                    ],
-                    examples=[
-                        ["Ciao! Come stai?"],
-                        ["Raccontami una breve storia."],
-                        ["Qual è il tuo piatto italiano preferito?"],
-                    ],
-                    cache_examples=False,
-                )
-            # Advanced settings panel
-            with gr.Column(scale=1, visible=False) as settings_panel:
-                gr.Markdown("### Advanced Settings")
-                gr.Slider(
-                    label="Temperature",
-                    minimum=0,
-                    maximum=1.0,
-                    step=0.1,
-                    value=0.0001,
-                )
-                gr.Slider(
-                    label="Top-p",
-                    minimum=0.05,
-                    maximum=1.0,
-                    step=0.05,
-                    value=1.0,
-                )
-                gr.Slider(
-                    label="Top-k",
-                    minimum=1,
-                    maximum=1000,
-                    step=1,
-                    value=50,
-                )
-                gr.Slider(
-                    label="Repetition Penalty",
-                    minimum=1.0,
-                    maximum=2.0,
-                    step=0.05,
-                    value=1.0,
-                )
 if __name__ == "__main__":
-    demo = create_chat_interface()
     demo.queue(max_size=20).launch()

 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+model_id = "DeepMount00/Lexora-Medium-7B"
 tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(
 )
 model.eval()
 @spaces.GPU(duration=90)
 def generate(
     message: str,
     chat_history: list[tuple[str, str]],
     system_message: str = "",
+    max_new_tokens: int = 1024,
+    temperature: float = 0.001,
     top_p: float = 1.0,
     top_k: int = 50,
     repetition_penalty: float = 1.0,
         outputs.append(text)
         yield "".join(outputs)
+chat_interface = gr.ChatInterface(
+    fn=generate,
+    additional_inputs=[
+        gr.Textbox(
+            value="",
+            label="System message",
+            render=False,
+        ),
+        gr.Slider(
+            label="Max new tokens",
+            minimum=1,
+            maximum=MAX_MAX_NEW_TOKENS,
+            step=1,
+            value=DEFAULT_MAX_NEW_TOKENS,
+        ),
+        gr.Slider(
+            label="Temperature",
+            minimum=0,
+            maximum=4.0,
+            step=0.1,
+            value=0.001,
+        ),
+        gr.Slider(
+            label="Top-p (nucleus sampling)",
+            minimum=0.05,
+            maximum=1.0,
+            step=0.05,
+            value=1.0,
+        ),
+        gr.Slider(
+            label="Top-k",
+            minimum=1,
+            maximum=1000,
+            step=1,
+            value=50,
+        ),
+        gr.Slider(
+            label="Repetition penalty",
+            minimum=1.0,
+            maximum=2.0,
+            step=0.05,
+            value=1.0,
+        ),
+    ],
+    stop_btn=None,
+    examples=[
+        ["Ciao! Come stai?"],
+    ],
+    cache_examples=False,
+)
+with gr.Blocks(css="style.css", fill_height=True, theme="soft") as demo:
+    gr.Markdown(DESCRIPTION)
+    gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
+    chat_interface.render()
 if __name__ == "__main__":
     demo.queue(max_size=20).launch()