Deploying ML Models with FastAPI: A Production Checklist

Minimal Production FastAPI

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import joblib
import numpy as np

app = FastAPI()
model = joblib.load('model.pkl')

class PredictRequest(BaseModel):
    features: list[float]
    
class PredictResponse(BaseModel):
    prediction: float
    probability: float
    model_version: str = 'v1.2'

@app.post('/predict', response_model=PredictResponse)
async def predict(req: PredictRequest):
    X = np.array(req.features).reshape(1, -1)
    pred = model.predict(X)[0]
    prob = model.predict_proba(X)[0].max()
    return PredictResponse(prediction=float(pred), probability=float(prob))

@app.get('/health')
def health(): return {'status': 'ok', 'model': 'loaded'}