### Model Description ```python gen_kwargs = { "max_new_tokens": 100, "top_k": 70, "top_p": 0.8, "do_sample": True, "no_repeat_ngram_size": 2, "bos_token_id": tokenizer.bos_token_id, "eos_token_id": tokenizer.eos_token_id, "pad_token_id": tokenizer.pad_token_id, "temperature": 0.8, "use_cache": True, "repetition_penalty": 1.2, "num_return_sequences": 1 } device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") ft = 'gpt-j-onlyk_v2' tokenizer = AutoTokenizer.from_pretrained(ft) model = AutoModelForCausalLM.from_pretrained(ft, torch_dtype=torch.float16, low_cpu_mem_usage=True) model.to(device) prepared = tokenizer.encode(inp, return_tensors='pt').to(model.device) out = model.generate(input_ids=prepared, **gen_kwargs) generated = tokenizer.decode(out[0]) ```