Scriptr / app.py
Sidharthan's picture
Changed the device configuration to solve issues
a892c2b
# app.py
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import AutoTokenizer
from peft import AutoPeftModelForCausalLM
import torch
from typing import Optional
import os
os.environ['HF_HOME'] = '/app/cache'
app = FastAPI(title="Gemma Script Generator API")
hf_token = os.getenv('HF_TOKEN')
# Load model and tokenizer
MODEL_NAME = "Sidharthan/gemma2_scripter"
try:
tokenizer = AutoTokenizer.from_pretrained(
MODEL_NAME,
trust_remote_code=True,
use_auth_token = hf_token
)
model = AutoPeftModelForCausalLM.from_pretrained(
MODEL_NAME,
device_map=None, # Will use CPU if GPU not available
trust_remote_code=True,
cache_dir = '/app/cache'
#load_in_4bit=True
)
except Exception as e:
print(f"Error loading model: {str(e)}")
raise
class GenerationRequest(BaseModel):
message: str
max_length: Optional[int] = 512
temperature: Optional[float] = 0.7
top_p: Optional[float] = 0.95
top_k: Optional[int] = 50
repetition_penalty: Optional[float] = 1.2
class GenerationResponse(BaseModel):
generated_text: str
@app.post("/generate", response_model=GenerationResponse)
async def generate_script(request: GenerationRequest):
try:
# Format prompt
prompt = request.message
# Tokenize input
inputs = tokenizer(prompt, return_tensors="pt")
if torch.cuda.is_available():
inputs = {k: v.cuda() for k, v in inputs.items()}
# Generate
outputs = model.generate(
**inputs,
max_length=request.max_length,
do_sample=True,
temperature=request.temperature,
top_p=request.top_p,
top_k=request.top_k,
repetition_penalty=request.repetition_penalty,
num_return_sequences=1,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id
)
# Decode output
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return GenerationResponse(generated_text=generated_text)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/health")
async def health_check():
return {"status": "healthy"}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)