Spaces:
Sleeping
Sleeping
File size: 1,720 Bytes
0a13415 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
from fastapi import FastAPI, HTTPException, Request
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from swiftmind.predictor import create_model, create_tokenizer, predict
model = create_model()
tokenizer = create_tokenizer()
class PredictionRequest(BaseModel):
prompt: str
max_new_tokens: int = 16
return_full_text: bool = False
class Prediction(BaseModel):
content: str
app = FastAPI(
title="SwiftMind API",
description="Rest API for serving LLM model predictions",
version="0.1.0",
)
app.add_middleware(
CORSMiddleware,
allow_origins="*",
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.middleware("http")
async def security_headers(request: Request, call_next):
response = await call_next(request)
response.headers["X-Content-Type-Options"] = "nosniff"
response.headers["X-Frame-Options"] = "DENY"
response.headers[
"Strict-Transport-Security"
] = "max-age=63072000; includeSubDomains"
# response.headers["Content-Security-Policy"] = "default-src 'self'"
response.headers["X-XSS-Protection"] = "1; mode=block"
return response
@app.route("/heartbeat")
async def heartbeat():
return {"status": "healthy"}
@app.post("/predict", response_model=Prediction, status_code=200)
async def make_prediction(request: PredictionRequest):
try:
prediction = predict(
request.prompt,
model,
tokenizer,
request.max_new_tokens,
request.return_full_text,
)
return Prediction(content=prediction)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
|