Spaces:
Sleeping
Sleeping
DeepMount00
commited on
Commit
•
6b8d341
1
Parent(s):
18f610c
Update app.py
Browse files
app.py
CHANGED
@@ -34,20 +34,24 @@ h1 {
|
|
34 |
|
35 |
@spaces.GPU(duration=120)
|
36 |
def chat_llama3_8b(message: str, history: list, temperature: float, max_new_tokens: int) -> str:
|
37 |
-
#
|
38 |
-
conversation = []
|
|
|
|
|
39 |
for user, assistant in history:
|
40 |
conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
|
|
|
|
|
41 |
conversation.append({"role": "user", "content": message})
|
42 |
|
43 |
-
#
|
44 |
input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
|
45 |
|
46 |
-
#
|
47 |
-
do_sample = True if temperature > 0 else False #
|
48 |
-
real_temperature = max(temperature, 0.001) #
|
49 |
|
50 |
-
#
|
51 |
generated_ids = model.generate(
|
52 |
input_ids=input_ids,
|
53 |
max_new_tokens=max_new_tokens,
|
@@ -56,7 +60,7 @@ def chat_llama3_8b(message: str, history: list, temperature: float, max_new_toke
|
|
56 |
eos_token_id=tokenizer.eos_token_id
|
57 |
)
|
58 |
|
59 |
-
#
|
60 |
decoded = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
|
61 |
prompt_end_index = decoded[0].find(message) + len(message)
|
62 |
final_response = decoded[0][prompt_end_index:] if prompt_end_index != -1 else decoded[0]
|
|
|
34 |
|
35 |
@spaces.GPU(duration=120)
|
36 |
def chat_llama3_8b(message: str, history: list, temperature: float, max_new_tokens: int) -> str:
|
37 |
+
# Initialize the conversation with a system prompt
|
38 |
+
conversation = [{"role": "system", "content": "Sei un assistente specializzato nella lingua italiana. Rispondi in modo preciso e dettagliato."}]
|
39 |
+
|
40 |
+
# Add historical conversation
|
41 |
for user, assistant in history:
|
42 |
conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
|
43 |
+
|
44 |
+
# Add the current user message to the conversation
|
45 |
conversation.append({"role": "user", "content": message})
|
46 |
|
47 |
+
# Prepare the input for the model
|
48 |
input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
|
49 |
|
50 |
+
# Parameters for generating text
|
51 |
+
do_sample = True if temperature > 0 else False # Use sampling unless temperature is 0
|
52 |
+
real_temperature = max(temperature, 0.001) # Avoid zero temperature which disables sampling
|
53 |
|
54 |
+
# Generate a response from the model
|
55 |
generated_ids = model.generate(
|
56 |
input_ids=input_ids,
|
57 |
max_new_tokens=max_new_tokens,
|
|
|
60 |
eos_token_id=tokenizer.eos_token_id
|
61 |
)
|
62 |
|
63 |
+
# Decode the generated tokens
|
64 |
decoded = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
|
65 |
prompt_end_index = decoded[0].find(message) + len(message)
|
66 |
final_response = decoded[0][prompt_end_index:] if prompt_end_index != -1 else decoded[0]
|