Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
from peft import prepare_model_for_kbit_training | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
model_path = "inception-mbzuai/jais-13b-chat" | |
prompt_eng = "### Instruction: \n\nComplete the conversation below between [|Human|] and [|AI|]:\n### Input: [|Human|] {Question}\n### Response: [|AI|]" | |
prompt_ar = "### Instruction: \n\nΨ£ΩΩ Ω Ψ§ΩΩ ΨΨ§Ψ―Ψ«Ψ© Ψ£Ψ―ΩΨ§Ω Ψ¨ΩΩ [|Human|] Ω [|AI|]:\n### Input: [|Human|] {Question}\n### Response: [|AI|]" | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
tokenizer = AutoTokenizer.from_pretrained(model_path) | |
#model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto", trust_remote_code=True) | |
#model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, trust_remote_code=True) | |
model = AutoModelForCausalLM.from_pretrained("inception-mbzuai/jais-13b-chat", load_in_8bit=True, device_map="auto", trust_remote_code=True) | |
model = prepare_model_for_kbit_training(model) | |
def get_response(text,tokenizer=tokenizer,model=model): | |
input_ids = tokenizer(text, return_tensors="pt").input_ids | |
inputs = input_ids.to(device) | |
input_len = inputs.shape[-1] | |
generate_ids = model.generate( | |
inputs, | |
top_p=0.9, | |
temperature=0.3, | |
max_length=2048-input_len, | |
min_length=input_len + 4, | |
repetition_penalty=1.2, | |
do_sample=True, | |
) | |
response = tokenizer.batch_decode( | |
generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True | |
)[0] | |
response = response.split("### Response: [|AI|]") | |
return response | |
def greet(): | |
ques= input() | |
text = prompt_ar.format_map({'Question':ques}) | |
return get_response(text) | |