Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
4 |
-
import spaces
|
5 |
import matplotlib.pyplot as plt
|
6 |
import numpy as np
|
7 |
from huggingface_hub import login
|
@@ -25,42 +24,38 @@ tokenizer = None
|
|
25 |
def load_model(model_name):
|
26 |
global model, tokenizer
|
27 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
28 |
-
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="
|
29 |
|
30 |
# Définir le token de padding s'il n'existe pas
|
31 |
if tokenizer.pad_token is None:
|
32 |
tokenizer.pad_token = tokenizer.eos_token
|
33 |
model.config.pad_token_id = model.config.eos_token_id
|
34 |
|
35 |
-
return f"Modèle {model_name} chargé avec succès sur
|
36 |
|
37 |
-
@spaces.GPU(duration=300)
|
38 |
def generate_text(input_text, temperature, top_p, top_k):
|
39 |
global model, tokenizer
|
40 |
|
41 |
-
inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
42 |
-
input_ids = inputs["input_ids"]
|
43 |
-
attention_mask = inputs["attention_mask"]
|
44 |
|
45 |
with torch.no_grad():
|
46 |
outputs = model.generate(
|
47 |
-
|
48 |
-
attention_mask=attention_mask,
|
49 |
max_new_tokens=50,
|
50 |
temperature=temperature,
|
51 |
top_p=top_p,
|
52 |
top_k=top_k,
|
53 |
-
output_attentions=
|
54 |
return_dict_in_generate=True
|
55 |
)
|
56 |
|
57 |
generated_text = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
|
58 |
|
59 |
# Obtenir les logits pour le dernier token généré
|
60 |
-
last_token_logits =
|
61 |
|
62 |
# Appliquer softmax pour obtenir les probabilités
|
63 |
-
probabilities = torch.nn.functional.softmax(last_token_logits
|
64 |
|
65 |
# Obtenir les top 5 tokens les plus probables
|
66 |
top_k = 5
|
@@ -70,16 +65,18 @@ def generate_text(input_text, temperature, top_p, top_k):
|
|
70 |
# Préparer les données pour le graphique des probabilités
|
71 |
prob_data = {word: prob.item() for word, prob in zip(top_words, top_probs)}
|
72 |
|
73 |
-
#
|
|
|
|
|
74 |
attention_data = {
|
75 |
-
'attention':
|
76 |
-
'tokens': tokenizer.convert_ids_to_tokens(
|
77 |
}
|
78 |
|
79 |
return generated_text, plot_attention(attention_data), plot_probabilities(prob_data)
|
80 |
|
81 |
def plot_attention(attention_data):
|
82 |
-
attention =
|
83 |
tokens = attention_data['tokens']
|
84 |
|
85 |
fig, ax = plt.subplots(figsize=(10, 10))
|
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
|
4 |
import matplotlib.pyplot as plt
|
5 |
import numpy as np
|
6 |
from huggingface_hub import login
|
|
|
24 |
def load_model(model_name):
|
25 |
global model, tokenizer
|
26 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
27 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
|
28 |
|
29 |
# Définir le token de padding s'il n'existe pas
|
30 |
if tokenizer.pad_token is None:
|
31 |
tokenizer.pad_token = tokenizer.eos_token
|
32 |
model.config.pad_token_id = model.config.eos_token_id
|
33 |
|
34 |
+
return f"Modèle {model_name} chargé avec succès sur GPU."
|
35 |
|
|
|
36 |
def generate_text(input_text, temperature, top_p, top_k):
|
37 |
global model, tokenizer
|
38 |
|
39 |
+
inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=512).to(model.device)
|
|
|
|
|
40 |
|
41 |
with torch.no_grad():
|
42 |
outputs = model.generate(
|
43 |
+
**inputs,
|
|
|
44 |
max_new_tokens=50,
|
45 |
temperature=temperature,
|
46 |
top_p=top_p,
|
47 |
top_k=top_k,
|
48 |
+
output_attentions=True,
|
49 |
return_dict_in_generate=True
|
50 |
)
|
51 |
|
52 |
generated_text = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
|
53 |
|
54 |
# Obtenir les logits pour le dernier token généré
|
55 |
+
last_token_logits = outputs.scores[-1][0]
|
56 |
|
57 |
# Appliquer softmax pour obtenir les probabilités
|
58 |
+
probabilities = torch.nn.functional.softmax(last_token_logits, dim=-1)
|
59 |
|
60 |
# Obtenir les top 5 tokens les plus probables
|
61 |
top_k = 5
|
|
|
65 |
# Préparer les données pour le graphique des probabilités
|
66 |
prob_data = {word: prob.item() for word, prob in zip(top_words, top_probs)}
|
67 |
|
68 |
+
# Extraire les attentions
|
69 |
+
attentions = torch.cat([att[-1].mean(dim=1) for att in outputs.attentions], dim=0).cpu().numpy()
|
70 |
+
|
71 |
attention_data = {
|
72 |
+
'attention': attentions,
|
73 |
+
'tokens': tokenizer.convert_ids_to_tokens(outputs.sequences[0])
|
74 |
}
|
75 |
|
76 |
return generated_text, plot_attention(attention_data), plot_probabilities(prob_data)
|
77 |
|
78 |
def plot_attention(attention_data):
|
79 |
+
attention = attention_data['attention']
|
80 |
tokens = attention_data['tokens']
|
81 |
|
82 |
fig, ax = plt.subplots(figsize=(10, 10))
|