Ernestasio commited on
Commit
e6ec663
·
1 Parent(s): 08c0b00

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -21
app.py CHANGED
@@ -1,30 +1,38 @@
1
- from transformers import AutoModelForCausalLM, AutoTokenizer
2
- import gradio
 
3
 
4
- # Especificar el dispositivo en el que se cargará el modelo (en este caso, "cuda" para GPU)
5
- device = "cuda"
 
 
6
 
7
- model_name = 'mistralai/Mistral-7B-Instruct-v0.1'
 
 
 
 
8
 
9
- # Cargar el modelo preentrenado y el tokenizador asociado
10
- model = AutoModelForCausalLM.from_pretrained(model_name, load_in_8bit=True,)
 
 
 
 
11
 
12
 
13
- # Cargar el tokenizador asociado
14
- tokenizer = AutoTokenizer.from_pretrained(model_name)
 
 
15
 
16
- def speak(prompt):
17
- # Tokenizar el prompt y convertirlo a tensores de PyTorch, luego enviarlos al dispositivo especificado
18
- model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
19
- model.to(device)
20
 
21
- # Generar texto condicionalmente a partir del prompt utilizando el modelo
22
- generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
 
23
 
24
- # Decodificar los identificadores generados en texto y imprimir el resultado
25
- resulting_text = tokenizer.batch_decode(generated_ids)[0]
26
- return resulting_text
27
 
28
-
29
- iface = gr.Interface(fn=speak, inputs="text", outputs="text")
30
- iface.launch()
 
1
+ import ctranslate2
2
+ import transformers
3
+ from huggingface_hub import snapshot_download
4
 
5
+ model_dir = snapshot_download(repo_id="Praise2112/Mistral-7B-Instruct-v0.1-int8-ct2")
6
+ generator = ctranslate2.Generator(model_dir, device="cuda", compute_type="int8") # GPU
7
+ # generator = ctranslate2.Generator(model_dir, device="cpu", compute_type="int8") #CPU
8
+ tokenizer = transformers.AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
9
 
10
+ messages = [
11
+ {"role": "user", "content": "What is your favourite condiment?"},
12
+ {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
13
+ {"role": "user", "content": "Do you have mayonnaise recipes?"}
14
+ ]
15
 
16
+ model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt")
17
+ model_inputs = [tokenizer.convert_ids_to_tokens(model_input) for model_input in model_inputs]
18
+ generated_ids = generator.generate_batch(model_inputs, max_length=1000, sampling_topk=10)
19
+ decoded = [res.sequences_ids[0] for res in generated_ids]
20
+ decoded = tokenizer.batch_decode(decoded)
21
+ print(decoded[0])
22
 
23
 
24
+ # def speak(prompt):
25
+ # # Tokenizar el prompt y convertirlo a tensores de PyTorch, luego enviarlos al dispositivo especificado
26
+ # model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
27
+ # model.to(device)
28
 
29
+ # # Generar texto condicionalmente a partir del prompt utilizando el modelo
30
+ # generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
 
 
31
 
32
+ # # Decodificar los identificadores generados en texto y imprimir el resultado
33
+ # resulting_text = tokenizer.batch_decode(generated_ids)[0]
34
+ # return resulting_text
35
 
 
 
 
36
 
37
+ # iface = gr.Interface(fn=speak, inputs="text", outputs="text")
38
+ # iface.launch()