from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch
# Load a GPT-2 model for general question answering
tokenizer = AutoTokenizer.from_pretrained("gpt2-medium", cache_dir="./cache")
model = AutoModelForCausalLM.from_pretrained("gpt2-medium", cache_dir="./cache")
question = "What is the capital of France?"
question = "List all US presidents in order of their presidency"
input_ids = tokenizer.encode(f"Q: {question}\nA:", return_tensors="pt")

# Generate a response
with torch.no_grad():
    output = model.generate(input_ids, max_length=150, num_return_sequences=1, 
                            temperature=0.7, top_k=50, top_p=0.95)

response = tokenizer.decode(output[0], skip_special_tokens=True)

print(response)