Spaces:
Paused
Paused
File size: 1,565 Bytes
d49f601 f308f42 e544d42 f165e87 17018a6 84de4b9 5fcd6db f165e87 17018a6 3ee1657 f308f42 5fcd6db 3ee1657 212571c 84de4b9 212571c 84de4b9 f165e87 e544d42 f165e87 84de4b9 e544d42 96d175f e544d42 f308f42 e88841b e602433 c22d12b e544d42 4db03af e544d42 e88841b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig,BitsAndBytesConfig
import torch
model_id = "truongghieu/deci-finetuned_Prj2"
# Check if a GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Just for GPU
bnb_config = BitsAndBytesConfig(
load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype="float16", bnb_4bit_use_double_quant=True
)
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
# Load model in this way if use GPU
if (device == "cuda"):
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True, quantization_config=bnb_config)
elif(device == "cpu"):
model = AutoModelForCausalLM.from_pretrained("truongghieu/deci-finetuned", trust_remote_code=True)
# Move the model to the GPU if available
generation_config = GenerationConfig(
penalty_alpha=0.6,
do_sample=True,
top_k=3,
temperature=0.5,
repetition_penalty=1.2,
max_new_tokens=50,
pad_token_id=tokenizer.eos_token_id
)
# Define a function that takes a text input and generates a text output
def generate_text(text):
input_text = f'###Human: \"{text}\"'
input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)
output_ids = model.generate(input_ids, generation_config=generation_config)
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
return output_text
iface = gr.Interface(fn=generate_text, inputs="text", outputs="text")
iface.launch()
|