Spaces:
Sleeping
Sleeping
from typing import Optional | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import torch | |
class PoetryGenerationService: | |
def __init__(self): | |
model_name = "meta-llama/Llama-3.2-3B-Instruct" # Adjust model name as needed | |
# Initialize tokenizer and model | |
self.tokenizer = AutoTokenizer.from_pretrained(model_name) | |
self.model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
torch_dtype=torch.float16, # Use float16 for efficiency | |
device_map="auto" # Automatically handle device placement | |
) | |
# Set model to evaluation mode | |
self.model.eval() | |
async def generate_poem( | |
self, | |
prompt: str, | |
temperature: Optional[float] = 0.7, | |
top_p: Optional[float] = 0.9, | |
top_k: Optional[int] = 50, | |
max_length: Optional[int] = 200, | |
repetition_penalty: Optional[float] = 1.1 | |
) -> str: | |
try: | |
# Tokenize the input prompt | |
inputs = self.tokenizer(prompt, return_tensors="pt", padding=True) | |
# Move input tensors to the same device as the model | |
inputs = {k: v.to(self.model.device) for k, v in inputs.items()} | |
# Generate text with the specified parameters | |
with torch.no_grad(): | |
outputs = self.model.generate( | |
inputs["input_ids"], | |
attention_mask=inputs["attention_mask"], | |
do_sample=True, | |
temperature=temperature, | |
top_p=top_p, | |
top_k=top_k, | |
max_length=max_length, | |
repetition_penalty=repetition_penalty, | |
pad_token_id=self.tokenizer.eos_token_id, | |
eos_token_id=self.tokenizer.eos_token_id, | |
) | |
# Decode the generated text | |
generated_text = self.tokenizer.decode( | |
outputs[0], | |
skip_special_tokens=True, | |
clean_up_tokenization_spaces=True | |
) | |
return generated_text | |
except Exception as e: | |
raise Exception(f"Error generating poem: {str(e)}") | |
def __del__(self): | |
# Clean up resources | |
try: | |
del self.model | |
del self.tokenizer | |
torch.cuda.empty_cache() # If using GPU | |
except: | |
pass |