from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM import torch # Load a GPT-2 model for general question answering tokenizer = AutoTokenizer.from_pretrained("gpt2-medium", cache_dir="./cache") model = AutoModelForCausalLM.from_pretrained("gpt2-medium", cache_dir="./cache") question = "What is the capital of France?" question = "List all US presidents in order of their presidency" input_ids = tokenizer.encode(f"Q: {question}\nA:", return_tensors="pt") # Generate a response with torch.no_grad(): output = model.generate(input_ids, max_length=150, num_return_sequences=1, temperature=0.7, top_k=50, top_p=0.95) response = tokenizer.decode(output[0], skip_special_tokens=True) print(response)