Multi-Stage Dockerfile
# Stage 1: Build environment
FROM python:3.11-slim as builder
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir --user -r requirements.txt
# Stage 2: Runtime
FROM python:3.11-slim
WORKDIR /app
COPY --from=builder /root/.local /root/.local
COPY . .
ENV PATH=/root/.local/bin:$PATH
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
Result: 2.1GB → 620MB image
GPU Support
FROM nvidia/cuda:12.1-cudnn8-runtime-ubuntu22.04
Model Caching Strategy
Bake the model into the image for zero cold-start:
RUN python -c "from transformers import AutoModel; AutoModel.from_pretrained('bert-base-multilingual-cased')"