import torch from transformers import AutoModelForCausalLM, AutoTokenizer model_id = "edumunozsala/llama-2-7b-int4-python-code-20k" tokenizer = AutoTokenizer.from_pretrained(hf_model_repo) model = AutoModelForCausalLM.from_pretrained(hf_model_repo, load_in_4bit=True, torch_dtype=torch.float16, device_map=device_map) instruction="Write a Python function to display the first and last elements of a list." input="" prompt = f"""### Instruction: Use the Task below and the Input given to write the Response, which is a programming code that can solve the Task. ### Task: {instruction} ### Input: {input} ### Response: """ input_ids = tokenizer(prompt, return_tensors="pt", truncation=True).input_ids.cuda() # with torch.inference_mode(): outputs = model.generate(input_ids=input_ids, max_new_tokens=100, do_sample=True, top_p=0.9,temperature=0.5) print(f"Prompt:\n{prompt}\n") print(f"Generated instruction:\n{tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0][len(prompt):]}")