DeepMount00 commited on
Commit
c1e9709
1 Parent(s): b2de139

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +159 -127
app.py CHANGED
@@ -14,136 +14,168 @@ subprocess.run(
14
  shell=True,
15
  )
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  DESCRIPTION = '''
18
- <div>
19
- <h1 style="text-align: center;">Lexora-Lite-3B</h1>
20
- <p>This Space demonstrates the instruction-tuned model <a href="https://huggingface.co/DeepMount00/Lexora-Lite-3B"><b>Lexora-Lite-3B Chat ITA</b></a>.</p>
21
- </div>
22
- <div>
23
- <p>This model, <strong>DeepMount00/Lexora-Lite-3B</strong>, is currently the best open-source large language model for the Italian language. You can view its ranking and compare it with other models on the leaderboard at <a href="https://huggingface.co/spaces/FinancialSupport/open_ita_llm_leaderboard"><b>this site</b></a>.</p>
 
 
 
 
24
  </div>
25
  '''
26
- MAX_MAX_NEW_TOKENS = 2048
27
- DEFAULT_MAX_NEW_TOKENS = 1024
28
- MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
29
-
30
- device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
31
-
32
- model_id = "DeepMount00/Lexora-Lite-3B"
33
- tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True,)
34
- model = AutoModelForCausalLM.from_pretrained(
35
- model_id,
36
- device_map="auto",
37
- torch_dtype=torch.bfloat16,
38
- attn_implementation="flash_attention_2",
39
- trust_remote_code=True,
40
- )
41
- model.eval()
42
-
43
-
44
- @spaces.GPU(duration=90)
45
- def generate(
46
- message: str,
47
- chat_history: list[tuple[str, str]],
48
- system_message: str = "",
49
- max_new_tokens: int = 2048,
50
- temperature: float = 0.0001,
51
- top_p: float = 1.0,
52
- top_k: int = 50,
53
- repetition_penalty: float = 1.0,
54
- ) -> Iterator[str]:
55
- conversation = [{"role": "system", "content": system_message}]
56
- for user, assistant in chat_history:
57
- conversation.extend(
58
- [
59
- {"role": "user", "content": user},
60
- {"role": "assistant", "content": assistant},
61
- ]
62
- )
63
- conversation.append({"role": "user", "content": message})
64
-
65
- input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
66
- if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
67
- input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
68
- gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
69
- input_ids = input_ids.to(model.device)
70
-
71
- streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
72
- generate_kwargs = dict(
73
- {"input_ids": input_ids},
74
- streamer=streamer,
75
- max_new_tokens=max_new_tokens,
76
- do_sample=True,
77
- top_p=top_p,
78
- top_k=top_k,
79
- temperature=temperature,
80
- num_beams=1,
81
- repetition_penalty=repetition_penalty,
82
- )
83
- t = Thread(target=model.generate, kwargs=generate_kwargs)
84
- t.start()
85
-
86
- outputs = []
87
- for text in streamer:
88
- outputs.append(text)
89
- yield "".join(outputs)
90
-
91
-
92
- chat_interface = gr.ChatInterface(
93
- fn=generate,
94
- additional_inputs=[
95
- gr.Textbox(
96
- value="",
97
- label="System message",
98
- render=False,
99
- ),
100
- gr.Slider(
101
- label="Max new tokens",
102
- minimum=1,
103
- maximum=MAX_MAX_NEW_TOKENS,
104
- step=1,
105
- value=DEFAULT_MAX_NEW_TOKENS,
106
- ),
107
- gr.Slider(
108
- label="Temperature",
109
- minimum=0,
110
- maximum=4.0,
111
- step=0.1,
112
- value=0.001,
113
- ),
114
- gr.Slider(
115
- label="Top-p (nucleus sampling)",
116
- minimum=0.05,
117
- maximum=1.0,
118
- step=0.05,
119
- value=1.0,
120
- ),
121
- gr.Slider(
122
- label="Top-k",
123
- minimum=1,
124
- maximum=1000,
125
- step=1,
126
- value=50,
127
- ),
128
- gr.Slider(
129
- label="Repetition penalty",
130
- minimum=1.0,
131
- maximum=2.0,
132
- step=0.05,
133
- value=1.0,
134
- ),
135
- ],
136
- stop_btn=None,
137
- examples=[
138
- ["Ciao! Come stai?"],
139
- ],
140
- cache_examples=False,
141
- )
142
 
143
- with gr.Blocks(css="style.css", fill_height=True, theme="soft") as demo:
144
- gr.Markdown(DESCRIPTION)
145
- gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
146
- chat_interface.render()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
  if __name__ == "__main__":
149
- demo.queue(max_size=20).launch()
 
14
  shell=True,
15
  )
16
 
17
+ CUSTOM_CSS = """
18
+ .container {
19
+ max-width: 1000px !important;
20
+ margin: auto !important;
21
+ padding-top: 2rem !important;
22
+ }
23
+
24
+ .header-container {
25
+ background: linear-gradient(135deg, #1e3a8a 0%, #3b82f6 100%);
26
+ padding: 2rem;
27
+ border-radius: 1rem;
28
+ margin-bottom: 2rem;
29
+ color: white;
30
+ box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
31
+ }
32
+
33
+ .model-info {
34
+ background: white;
35
+ padding: 1.5rem;
36
+ border-radius: 0.5rem;
37
+ margin-top: 1rem;
38
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
39
+ }
40
+
41
+ .chat-container {
42
+ border: 1px solid #e5e7eb;
43
+ border-radius: 1rem;
44
+ background: white;
45
+ box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
46
+ }
47
+
48
+ .message {
49
+ padding: 1rem;
50
+ margin: 0.5rem;
51
+ border-radius: 0.5rem;
52
+ }
53
+
54
+ .user-message {
55
+ background: #f3f4f6;
56
+ }
57
+
58
+ .assistant-message {
59
+ background: #dbeafe;
60
+ }
61
+
62
+ .controls-container {
63
+ background: #f8fafc;
64
+ padding: 1.5rem;
65
+ border-radius: 0.5rem;
66
+ margin-top: 1rem;
67
+ }
68
+
69
+ .slider-label {
70
+ font-weight: 600;
71
+ color: #374151;
72
+ }
73
+
74
+ .duplicate-button {
75
+ background: #2563eb !important;
76
+ color: white !important;
77
+ padding: 0.75rem 1.5rem !important;
78
+ border-radius: 0.5rem !important;
79
+ font-weight: 600 !important;
80
+ transition: all 0.2s !important;
81
+ }
82
+
83
+ .duplicate-button:hover {
84
+ background: #1d4ed8 !important;
85
+ transform: translateY(-1px) !important;
86
+ }
87
+ """
88
+
89
  DESCRIPTION = '''
90
+ <div class="header-container">
91
+ <h1 style="font-size: 2.5rem; font-weight: 700; margin-bottom: 1rem; text-align: center;">Lexora-Lite-3B</h1>
92
+ <div class="model-info">
93
+ <h2 style="font-size: 1.5rem; font-weight: 600; color: #1e3a8a; margin-bottom: 1rem;">About the Model</h2>
94
+ <p style="color: #374151; line-height: 1.6;">
95
+ This Space demonstrates <a href="https://huggingface.co/DeepMount00/Lexora-Lite-3B" style="color: #2563eb; font-weight: 600;">Lexora-Lite-3B Chat ITA</a>,
96
+ currently the best open-source large language model for the Italian language. Compare its performance with other models on the
97
+ <a href="https://huggingface.co/spaces/FinancialSupport/open_ita_llm_leaderboard" style="color: #2563eb; font-weight: 600;">official leaderboard</a>.
98
+ </p>
99
+ </div>
100
  </div>
101
  '''
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
+ # Rest of your existing code remains the same until the Blocks creation
104
+
105
+ with gr.Blocks(css=CUSTOM_CSS, theme=gr.themes.Soft(
106
+ primary_hue="blue",
107
+ secondary_hue="blue",
108
+ neutral_hue="slate",
109
+ font=gr.themes.GoogleFont("Inter"),
110
+ radius_size=gr.themes.sizes.radius_sm,
111
+ )) as demo:
112
+ with gr.Column(elem_classes="container"):
113
+ gr.Markdown(DESCRIPTION)
114
+
115
+ with gr.Column(elem_classes="chat-container"):
116
+ chat_interface = gr.ChatInterface(
117
+ fn=generate,
118
+ additional_inputs=[
119
+ gr.Textbox(
120
+ value="",
121
+ label="System Message",
122
+ elem_classes="system-message",
123
+ render=False,
124
+ ),
125
+ gr.Column(elem_classes="controls-container") as controls:
126
+ with controls:
127
+ gr.Slider(
128
+ label="Maximum New Tokens",
129
+ minimum=1,
130
+ maximum=MAX_MAX_NEW_TOKENS,
131
+ step=1,
132
+ value=DEFAULT_MAX_NEW_TOKENS,
133
+ elem_classes="slider-label",
134
+ ),
135
+ gr.Slider(
136
+ label="Temperature",
137
+ minimum=0,
138
+ maximum=4.0,
139
+ step=0.1,
140
+ value=0.001,
141
+ elem_classes="slider-label",
142
+ ),
143
+ gr.Slider(
144
+ label="Top-p (Nucleus Sampling)",
145
+ minimum=0.05,
146
+ maximum=1.0,
147
+ step=0.05,
148
+ value=1.0,
149
+ elem_classes="slider-label",
150
+ ),
151
+ gr.Slider(
152
+ label="Top-k",
153
+ minimum=1,
154
+ maximum=1000,
155
+ step=1,
156
+ value=50,
157
+ elem_classes="slider-label",
158
+ ),
159
+ gr.Slider(
160
+ label="Repetition Penalty",
161
+ minimum=1.0,
162
+ maximum=2.0,
163
+ step=0.05,
164
+ value=1.0,
165
+ elem_classes="slider-label",
166
+ ),
167
+ ],
168
+ examples=[
169
+ ["Ciao! Come stai?"],
170
+ ],
171
+ cache_examples=False,
172
+ )
173
+
174
+ gr.DuplicateButton(
175
+ value="Duplicate Space for Private Use",
176
+ elem_classes="duplicate-button",
177
+ elem_id="duplicate-button",
178
+ )
179
 
180
  if __name__ == "__main__":
181
+ demo.queue(max_size=20).launch()